Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_DistObject_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 // clang-format off
11 #ifndef TPETRA_DISTOBJECT_DEF_HPP
12 #define TPETRA_DISTOBJECT_DEF_HPP
13 
21 
22 #include "Tpetra_Distributor.hpp"
25 #include "Tpetra_Details_checkGlobalError.hpp"
27 #include "Tpetra_Util.hpp" // Details::createPrefix
28 #include "Teuchos_CommHelpers.hpp"
29 #include "Teuchos_TypeNameTraits.hpp"
30 #include <typeinfo>
31 #include <memory>
32 #include <sstream>
33 
34 namespace Tpetra {
35 
36  namespace { // (anonymous)
37  template<class DeviceType, class IndexType = size_t>
38  struct SumFunctor {
39  SumFunctor (const Kokkos::View<const size_t*, DeviceType>& viewToSum) :
40  viewToSum_ (viewToSum) {}
41  KOKKOS_INLINE_FUNCTION void operator() (const IndexType i, size_t& lclSum) const {
42  lclSum += viewToSum_(i);
43  }
44  Kokkos::View<const size_t*, DeviceType> viewToSum_;
45  };
46 
47  template<class DeviceType, class IndexType = size_t>
48  size_t
49  countTotalImportPackets (const Kokkos::View<const size_t*, DeviceType>& numImportPacketsPerLID)
50  {
51  using Kokkos::parallel_reduce;
52  typedef DeviceType DT;
53  typedef typename DT::execution_space DES;
54  typedef Kokkos::RangePolicy<DES, IndexType> range_type;
55 
56  const IndexType numOut = numImportPacketsPerLID.extent (0);
57  size_t totalImportPackets = 0;
58  parallel_reduce ("Count import packets",
59  range_type (0, numOut),
60  SumFunctor<DeviceType, IndexType> (numImportPacketsPerLID),
61  totalImportPackets);
62  return totalImportPackets;
63  }
64  } // namespace (anonymous)
65 
66 
67  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
69  DistObject (const Teuchos::RCP<const map_type>& map) :
70  map_ (map) {}
71 
72  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
73  std::string
75  description () const
76  {
77  using Teuchos::TypeNameTraits;
78 
79  std::ostringstream os;
80  os << "\"Tpetra::DistObject\": {"
81  << "Packet: " << TypeNameTraits<packet_type>::name ()
82  << ", LocalOrdinal: " << TypeNameTraits<local_ordinal_type>::name ()
83  << ", GlobalOrdinal: " << TypeNameTraits<global_ordinal_type>::name ()
84  << ", Node: " << TypeNameTraits<Node>::name ();
85  if (this->getObjectLabel () != "") {
86  os << "Label: \"" << this->getObjectLabel () << "\"";
87  }
88  os << "}";
89  return os.str ();
90  }
91 
92  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
93  void
95  describe (Teuchos::FancyOStream &out,
96  const Teuchos::EVerbosityLevel verbLevel) const
97  {
98  using Teuchos::rcpFromRef;
99  using Teuchos::TypeNameTraits;
100  using std::endl;
101  const Teuchos::EVerbosityLevel vl = (verbLevel == Teuchos::VERB_DEFAULT) ?
102  Teuchos::VERB_LOW : verbLevel;
103  Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getMap ()->getComm ();
104  const int myRank = comm.is_null () ? 0 : comm->getRank ();
105  const int numProcs = comm.is_null () ? 1 : comm->getSize ();
106 
107  if (vl != Teuchos::VERB_NONE) {
108  Teuchos::OSTab tab0 (out);
109  if (myRank == 0) {
110  out << "\"Tpetra::DistObject\":" << endl;
111  }
112  Teuchos::OSTab tab1 (out);
113  if (myRank == 0) {
114  out << "Template parameters:" << endl;
115  {
116  Teuchos::OSTab tab2 (out);
117  out << "Packet: " << TypeNameTraits<packet_type>::name () << endl
118  << "LocalOrdinal: " << TypeNameTraits<local_ordinal_type>::name () << endl
119  << "GlobalOrdinal: " << TypeNameTraits<global_ordinal_type>::name () << endl
120  << "Node: " << TypeNameTraits<node_type>::name () << endl;
121  }
122  if (this->getObjectLabel () != "") {
123  out << "Label: \"" << this->getObjectLabel () << "\"" << endl;
124  }
125  } // if myRank == 0
126 
127  // Describe the Map.
128  {
129  if (myRank == 0) {
130  out << "Map:" << endl;
131  }
132  Teuchos::OSTab tab2 (out);
133  map_->describe (out, vl);
134  }
135 
136  // At verbosity > VERB_LOW, each process prints something.
137  if (vl > Teuchos::VERB_LOW) {
138  for (int p = 0; p < numProcs; ++p) {
139  if (myRank == p) {
140  out << "Process " << myRank << ":" << endl;
141  Teuchos::OSTab tab2 (out);
142  out << "Export buffer size (in packets): "
143  << exports_.extent (0)
144  << endl
145  << "Import buffer size (in packets): "
146  << imports_.extent (0)
147  << endl;
148  }
149  if (! comm.is_null ()) {
150  comm->barrier (); // give output time to finish
151  comm->barrier ();
152  comm->barrier ();
153  }
154  } // for each process rank p
155  } // if vl > VERB_LOW
156  } // if vl != VERB_NONE
157  }
158 
159  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
160  void
162  removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& /* newMap */)
163  {
164  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
165  "Tpetra::DistObject::removeEmptyProcessesInPlace: Not implemented");
166  }
167 
168  /* These are provided in base DistObject template
169  template<class DistObjectType>
170  void
171  removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input,
172  const Teuchos::RCP<const Map<typename DistObjectType::local_ordinal_type,
173  typename DistObjectType::global_ordinal_type,
174  typename DistObjectType::node_type> >& newMap)
175  {
176  input->removeEmptyProcessesInPlace (newMap);
177  if (newMap.is_null ()) { // my process is excluded
178  input = Teuchos::null;
179  }
180  }
181 
182  template<class DistObjectType>
183  void
184  removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input)
185  {
186  using Teuchos::RCP;
187  typedef typename DistObjectType::local_ordinal_type LO;
188  typedef typename DistObjectType::global_ordinal_type GO;
189  typedef typename DistObjectType::node_type NT;
190  typedef Map<LO, GO, NT> map_type;
191 
192  RCP<const map_type> newMap = input->getMap ()->removeEmptyProcesses ();
193  removeEmptyProcessesInPlace<DistObjectType> (input, newMap);
194  }
195  */
196 
197  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
198  void
200  doImport (const SrcDistObject& source,
202  const CombineMode CM,
203  const bool restrictedMode)
204  {
205  using Details::Behavior;
206  using std::endl;
207  const char modeString[] = "doImport (forward mode)";
208 
209  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
210  // output to std::cerr on every MPI process. This is unwise for
211  // runs with large numbers of MPI processes.
212  const bool verbose = Behavior::verbose("DistObject");
213  std::unique_ptr<std::string> prefix;
214  if (verbose) {
215  prefix = this->createPrefix("DistObject", modeString);
216  std::ostringstream os;
217  os << *prefix << "Start" << endl;
218  std::cerr << os.str ();
219  }
220  this->beginImport(source, importer, CM, restrictedMode);
221  this->endImport(source, importer, CM, restrictedMode);
222  if (verbose) {
223  std::ostringstream os;
224  os << *prefix << "Done" << endl;
225  std::cerr << os.str ();
226  }
227  }
228 
229  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
230  void
232  doExport (const SrcDistObject& source,
234  const CombineMode CM,
235  const bool restrictedMode)
236  {
237  using Details::Behavior;
238  using std::endl;
239  const char modeString[] = "doExport (forward mode)";
240 
241  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
242  // output to std::cerr on every MPI process. This is unwise for
243  // runs with large numbers of MPI processes.
244  const bool verbose = Behavior::verbose("DistObject");
245  std::unique_ptr<std::string> prefix;
246  if (verbose) {
247  prefix = this->createPrefix("DistObject", modeString);
248  std::ostringstream os;
249  os << *prefix << "Start" << endl;
250  std::cerr << os.str ();
251  }
252  this->beginExport(source, exporter, CM, restrictedMode);
253  this->endExport(source, exporter, CM, restrictedMode);
254  if (verbose) {
255  std::ostringstream os;
256  os << *prefix << "Done" << endl;
257  std::cerr << os.str ();
258  }
259  }
260 
261  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
262  void
264  doImport (const SrcDistObject& source,
266  const CombineMode CM,
267  const bool restrictedMode)
268  {
269  using Details::Behavior;
270  using std::endl;
271  const char modeString[] = "doImport (reverse mode)";
272 
273  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
274  // output to std::cerr on every MPI process. This is unwise for
275  // runs with large numbers of MPI processes.
276  const bool verbose = Behavior::verbose("DistObject");
277  std::unique_ptr<std::string> prefix;
278  if (verbose) {
279  prefix = this->createPrefix("DistObject", modeString);
280  std::ostringstream os;
281  os << *prefix << "Start" << endl;
282  std::cerr << os.str ();
283  }
284  this->beginImport(source, exporter, CM, restrictedMode);
285  this->endImport(source, exporter, CM, restrictedMode);
286  if (verbose) {
287  std::ostringstream os;
288  os << *prefix << "Done" << endl;
289  std::cerr << os.str ();
290  }
291  }
292 
293  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
294  void
296  doExport (const SrcDistObject& source,
298  const CombineMode CM,
299  const bool restrictedMode)
300  {
301  using Details::Behavior;
302  using std::endl;
303  const char modeString[] = "doExport (reverse mode)";
304 
305  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
306  // output to std::cerr on every MPI process. This is unwise for
307  // runs with large numbers of MPI processes.
308  const bool verbose = Behavior::verbose("DistObject");
309  std::unique_ptr<std::string> prefix;
310  if (verbose) {
311  prefix = this->createPrefix("DistObject", modeString);
312  std::ostringstream os;
313  os << *prefix << "Start" << endl;
314  std::cerr << os.str ();
315  }
316  this->beginExport(source, importer, CM, restrictedMode);
317  this->endExport(source, importer, CM, restrictedMode);
318  if (verbose) {
319  std::ostringstream os;
320  os << *prefix << "Done" << endl;
321  std::cerr << os.str ();
322  }
323  }
324 
325  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
326  void
328  beginImport(const SrcDistObject& source,
330  const CombineMode CM,
331  const bool restrictedMode)
332  {
333  using Details::Behavior;
334  using std::endl;
335  const char modeString[] = "beginImport (forward mode)";
336 
337  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
338  // output to std::cerr on every MPI process. This is unwise for
339  // runs with large numbers of MPI processes.
340  const bool verbose = Behavior::verbose("DistObject");
341  std::unique_ptr<std::string> prefix;
342  if (verbose) {
343  prefix = this->createPrefix("DistObject", modeString);
344  std::ostringstream os;
345  os << *prefix << "Start" << endl;
346  std::cerr << os.str ();
347  }
348  this->beginTransfer(source, importer, modeString, DoForward, CM, restrictedMode);
349  if (verbose) {
350  std::ostringstream os;
351  os << *prefix << "Done" << endl;
352  std::cerr << os.str ();
353  }
354  }
355 
356  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
357  void
358  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
359  beginExport(const SrcDistObject& source,
360  const Export<LocalOrdinal, GlobalOrdinal, Node>& exporter,
361  const CombineMode CM,
362  const bool restrictedMode)
363  {
364  using Details::Behavior;
365  using std::endl;
366  const char modeString[] = "beginExport (forward mode)";
367 
368  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
369  // output to std::cerr on every MPI process. This is unwise for
370  // runs with large numbers of MPI processes.
371  const bool verbose = Behavior::verbose("DistObject");
372  std::unique_ptr<std::string> prefix;
373  if (verbose) {
374  prefix = this->createPrefix("DistObject", modeString);
375  std::ostringstream os;
376  os << *prefix << "Start" << endl;
377  std::cerr << os.str ();
378  }
379  this->beginTransfer(source, exporter, modeString, DoForward, CM, restrictedMode);
380  if (verbose) {
381  std::ostringstream os;
382  os << *prefix << "Done" << endl;
383  std::cerr << os.str ();
384  }
385  }
386 
387  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
388  void
389  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
390  beginImport(const SrcDistObject& source,
391  const Export<LocalOrdinal, GlobalOrdinal, Node>& exporter,
392  const CombineMode CM,
393  const bool restrictedMode)
394  {
395  using Details::Behavior;
396  using std::endl;
397  const char modeString[] = "beginImport (reverse mode)";
398 
399  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
400  // output to std::cerr on every MPI process. This is unwise for
401  // runs with large numbers of MPI processes.
402  const bool verbose = Behavior::verbose("DistObject");
403  std::unique_ptr<std::string> prefix;
404  if (verbose) {
405  prefix = this->createPrefix("DistObject", modeString);
406  std::ostringstream os;
407  os << *prefix << "Start" << endl;
408  std::cerr << os.str ();
409  }
410  this->beginTransfer(source, exporter, modeString, DoReverse, CM, restrictedMode);
411  if (verbose) {
412  std::ostringstream os;
413  os << *prefix << "Done" << endl;
414  std::cerr << os.str ();
415  }
416  }
417 
418  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
419  void
420  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
421  beginExport(const SrcDistObject& source,
422  const Import<LocalOrdinal, GlobalOrdinal, Node> & importer,
423  const CombineMode CM,
424  const bool restrictedMode)
425  {
426  using Details::Behavior;
427  using std::endl;
428  const char modeString[] = "beginExport (reverse mode)";
429 
430  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
431  // output to std::cerr on every MPI process. This is unwise for
432  // runs with large numbers of MPI processes.
433  const bool verbose = Behavior::verbose("DistObject");
434  std::unique_ptr<std::string> prefix;
435  if (verbose) {
436  prefix = this->createPrefix("DistObject", modeString);
437  std::ostringstream os;
438  os << *prefix << "Start" << endl;
439  std::cerr << os.str ();
440  }
441  this->beginTransfer(source, importer, modeString, DoReverse, CM, restrictedMode);
442  if (verbose) {
443  std::ostringstream os;
444  os << *prefix << "Done" << endl;
445  std::cerr << os.str ();
446  }
447  }
448 
449  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
450  void
451  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
452  endImport(const SrcDistObject& source,
453  const Import<LocalOrdinal, GlobalOrdinal, Node>& importer,
454  const CombineMode CM,
455  const bool restrictedMode)
456  {
457  using Details::Behavior;
458  using std::endl;
459  const char modeString[] = "endImport (forward mode)";
460 
461  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
462  // output to std::cerr on every MPI process. This is unwise for
463  // runs with large numbers of MPI processes.
464  const bool verbose = Behavior::verbose("DistObject");
465  std::unique_ptr<std::string> prefix;
466  if (verbose) {
467  prefix = this->createPrefix("DistObject", modeString);
468  std::ostringstream os;
469  os << *prefix << "Start" << endl;
470  std::cerr << os.str ();
471  }
472  this->endTransfer(source, importer, modeString, DoForward, CM, restrictedMode);
473  if (verbose) {
474  std::ostringstream os;
475  os << *prefix << "Done" << endl;
476  std::cerr << os.str ();
477  }
478  }
479 
480  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
481  void
482  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
483  endExport(const SrcDistObject& source,
484  const Export<LocalOrdinal, GlobalOrdinal, Node>& exporter,
485  const CombineMode CM,
486  const bool restrictedMode)
487  {
488  using Details::Behavior;
489  using std::endl;
490  const char modeString[] = "endExport (forward mode)";
491 
492  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
493  // output to std::cerr on every MPI process. This is unwise for
494  // runs with large numbers of MPI processes.
495  const bool verbose = Behavior::verbose("DistObject");
496  std::unique_ptr<std::string> prefix;
497  if (verbose) {
498  prefix = this->createPrefix("DistObject", modeString);
499  std::ostringstream os;
500  os << *prefix << "Start" << endl;
501  std::cerr << os.str ();
502  }
503  this->endTransfer(source, exporter, modeString, DoForward, CM, restrictedMode);
504  if (verbose) {
505  std::ostringstream os;
506  os << *prefix << "Done" << endl;
507  std::cerr << os.str ();
508  }
509  }
510 
511  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
512  void
513  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
514  endImport(const SrcDistObject& source,
515  const Export<LocalOrdinal, GlobalOrdinal, Node>& exporter,
516  const CombineMode CM,
517  const bool restrictedMode)
518  {
519  using Details::Behavior;
520  using std::endl;
521  const char modeString[] = "endImport (reverse mode)";
522 
523  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
524  // output to std::cerr on every MPI process. This is unwise for
525  // runs with large numbers of MPI processes.
526  const bool verbose = Behavior::verbose("DistObject");
527  std::unique_ptr<std::string> prefix;
528  if (verbose) {
529  prefix = this->createPrefix("DistObject", modeString);
530  std::ostringstream os;
531  os << *prefix << "Start" << endl;
532  std::cerr << os.str ();
533  }
534  this->endTransfer(source, exporter, modeString, DoReverse, CM, restrictedMode);
535  if (verbose) {
536  std::ostringstream os;
537  os << *prefix << "Done" << endl;
538  std::cerr << os.str ();
539  }
540  }
541 
542  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
543  void
544  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
545  endExport(const SrcDistObject& source,
546  const Import<LocalOrdinal, GlobalOrdinal, Node> & importer,
547  const CombineMode CM,
548  const bool restrictedMode)
549  {
550  using Details::Behavior;
551  using std::endl;
552  const char modeString[] = "endExport (reverse mode)";
553 
554  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
555  // output to std::cerr on every MPI process. This is unwise for
556  // runs with large numbers of MPI processes.
557  const bool verbose = Behavior::verbose("DistObject");
558  std::unique_ptr<std::string> prefix;
559  if (verbose) {
560  prefix = this->createPrefix("DistObject", modeString);
561  std::ostringstream os;
562  os << *prefix << "Start" << endl;
563  std::cerr << os.str ();
564  }
565  this->endTransfer(source, importer, modeString, DoReverse, CM, restrictedMode);
566  if (verbose) {
567  std::ostringstream os;
568  os << *prefix << "Done" << endl;
569  std::cerr << os.str ();
570  }
571  }
572 
573  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
574  bool
577  return distributorActor_.isReady();
578  }
579 
580  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
581  bool
583  isDistributed () const {
584  return map_->isDistributed ();
585  }
586 
587  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
588  size_t
591  return 0; // default implementation; subclasses may override
592  }
593 
594  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
595  void
598  const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
599  const char modeString[],
600  const ReverseOption revOp,
601  const CombineMode CM,
602  bool restrictedMode)
603  {
604  beginTransfer(src, transfer, modeString, revOp, CM, restrictedMode);
605  endTransfer(src, transfer, modeString, revOp, CM, restrictedMode);
606  }
607 
608  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
609  bool
611  reallocImportsIfNeeded (const size_t newSize,
612  const bool verbose,
613  const std::string* prefix,
614  const bool /*remoteLIDsContiguous*/,
615  const CombineMode /*CM*/)
616  {
617  if (verbose) {
618  std::ostringstream os;
619  os << *prefix << "Realloc (if needed) imports_ from "
620  << imports_.extent (0) << " to " << newSize << std::endl;
621  std::cerr << os.str ();
622  }
624  const bool reallocated =
625  reallocDualViewIfNeeded (this->imports_, newSize, "imports");
626  if (verbose) {
627  std::ostringstream os;
628  os << *prefix << "Finished realloc'ing imports_" << std::endl;
629  std::cerr << os.str ();
630  }
631  return reallocated;
632  }
633 
634  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
635  bool
637  reallocArraysForNumPacketsPerLid (const size_t numExportLIDs,
638  const size_t numImportLIDs)
639  {
640  using Details::Behavior;
643  using std::endl;
644  // If an array is already allocated, and if is at least
645  // tooBigFactor times bigger than it needs to be, free it and
646  // reallocate to the size we need, in order to save space.
647  // Otherwise, take subviews to reduce allocation size.
648  constexpr size_t tooBigFactor = 10;
649 
650  const bool verbose = Behavior::verbose("DistObject");
651  std::unique_ptr<std::string> prefix;
652  if (verbose) {
653  prefix = this->createPrefix("DistObject",
654  "reallocArraysForNumPacketsPerLid");
655  std::ostringstream os;
656  os << *prefix
657  << "numExportLIDs: " << numExportLIDs
658  << ", numImportLIDs: " << numImportLIDs
659  << endl;
660  os << *prefix << "DualView status before:" << endl
661  << *prefix
662  << dualViewStatusToString (this->numExportPacketsPerLID_,
663  "numExportPacketsPerLID_")
664  << endl
665  << *prefix
666  << dualViewStatusToString (this->numImportPacketsPerLID_,
667  "numImportPacketsPerLID_")
668  << endl;
669  std::cerr << os.str ();
670  }
671 
672  // Reallocate numExportPacketsPerLID_ if needed.
673  const bool firstReallocated =
674  reallocDualViewIfNeeded (this->numExportPacketsPerLID_,
675  numExportLIDs,
676  "numExportPacketsPerLID",
677  tooBigFactor,
678  true); // need fence before, if realloc'ing
679 
680  // If we reallocated above, then we fenced after that
681  // reallocation. This means that we don't need to fence again,
682  // before the next reallocation.
683  const bool needFenceBeforeNextAlloc = ! firstReallocated;
684  const bool secondReallocated =
685  reallocDualViewIfNeeded (this->numImportPacketsPerLID_,
686  numImportLIDs,
687  "numImportPacketsPerLID",
688  tooBigFactor,
689  needFenceBeforeNextAlloc);
690 
691  if (verbose) {
692  std::ostringstream os;
693  os << *prefix << "DualView status after:" << endl
694  << *prefix << dualViewStatusToString (this->numExportPacketsPerLID_,
695  "numExportPacketsPerLID_")
696  << endl
697  << *prefix << dualViewStatusToString (this->numImportPacketsPerLID_,
698  "numImportPacketsPerLID_")
699  << endl;
700  std::cerr << os.str ();
701  }
702 
703  return firstReallocated || secondReallocated;
704  }
705 
706  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
707  void
710  const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
711  const char modeString[],
712  const ReverseOption revOp,
713  const CombineMode CM,
714  bool restrictedMode)
715  {
716  using Details::Behavior;
720  using Kokkos::Compat::getArrayView;
721  using Kokkos::Compat::getConstArrayView;
722  using Kokkos::Compat::getKokkosViewDeepCopy;
723  using Kokkos::Compat::create_const_view;
724  using std::endl;
727 
728  const bool commOnHost = ! Behavior::assumeMpiIsGPUAware ();
729  const char funcNameHost[] = "Tpetra::DistObject::beginTransfer[Host]";
730  const char funcNameDevice[] = "Tpetra::DistObject::beginTransfer[Device]";
731  const char *funcName = commOnHost ? funcNameHost : funcNameDevice;
732 
733  ProfilingRegion region_doTransfer(funcName);
734  const bool verbose = Behavior::verbose("DistObject");
735  std::shared_ptr<std::string> prefix;
736  if (verbose) {
737  std::ostringstream os;
738  prefix = this->createPrefix("DistObject", "doTransfer");
739  os << *prefix << "Source type: " << Teuchos::typeName(src)
740  << ", Target type: " << Teuchos::typeName(*this) << endl;
741  std::cerr << os.str();
742  }
743 
744  // "Restricted Mode" does two things:
745  // 1) Skips copyAndPermute
746  // 2) Allows the "target" Map of the transfer to be a subset of
747  // the Map of *this, in a "locallyFitted" sense.
748  //
749  // This cannot be used if #2 is not true, OR there are permutes.
750  // Source Maps still need to match
751 
752  // mfh 18 Oct 2017: Set TPETRA_DEBUG to true to enable extra debug
753  // checks. These may communicate more.
754  const bool debug = Behavior::debug("DistObject");
755  if (debug) {
756  if (! restrictedMode && revOp == DoForward) {
757  const bool myMapSameAsTransferTgtMap =
758  this->getMap ()->isSameAs (* (transfer.getTargetMap ()));
759  TEUCHOS_TEST_FOR_EXCEPTION
760  (! myMapSameAsTransferTgtMap, std::invalid_argument,
761  "Tpetra::DistObject::" << modeString << ": For forward-mode "
762  "communication, the target DistObject's Map must be the same "
763  "(in the sense of Tpetra::Map::isSameAs) as the input "
764  "Export/Import object's target Map.");
765  }
766  else if (! restrictedMode && revOp == DoReverse) {
767  const bool myMapSameAsTransferSrcMap =
768  this->getMap ()->isSameAs (* (transfer.getSourceMap ()));
769  TEUCHOS_TEST_FOR_EXCEPTION
770  (! myMapSameAsTransferSrcMap, std::invalid_argument,
771  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
772  "communication, the target DistObject's Map must be the same "
773  "(in the sense of Tpetra::Map::isSameAs) as the input "
774  "Export/Import object's source Map.");
775  }
776  else if (restrictedMode && revOp == DoForward) {
777  const bool myMapLocallyFittedTransferTgtMap =
778  this->getMap ()->isLocallyFitted (* (transfer.getTargetMap ()));
779  TEUCHOS_TEST_FOR_EXCEPTION
780  (! myMapLocallyFittedTransferTgtMap , std::invalid_argument,
781  "Tpetra::DistObject::" << modeString << ": For forward-mode "
782  "communication using restricted mode, Export/Import object's "
783  "target Map must be locally fitted (in the sense of "
784  "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
785  }
786  else { // if (restrictedMode && revOp == DoReverse)
787  const bool myMapLocallyFittedTransferSrcMap =
788  this->getMap ()->isLocallyFitted (* (transfer.getSourceMap ()));
789  TEUCHOS_TEST_FOR_EXCEPTION
790  (! myMapLocallyFittedTransferSrcMap, std::invalid_argument,
791  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
792  "communication using restricted mode, Export/Import object's "
793  "source Map must be locally fitted (in the sense of "
794  "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
795  }
796 
797  // SrcDistObject need not even _have_ Maps. However, if the
798  // source object is a DistObject, it has a Map, and we may
799  // compare that Map with the Transfer's Maps.
800  const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
801  if (srcDistObj != nullptr) {
802  if (revOp == DoForward) {
803  const bool srcMapSameAsImportSrcMap =
804  srcDistObj->getMap ()->isSameAs (* (transfer.getSourceMap ()));
805  TEUCHOS_TEST_FOR_EXCEPTION
806  (! srcMapSameAsImportSrcMap, std::invalid_argument,
807  "Tpetra::DistObject::" << modeString << ": For forward-mode "
808  "communication, the source DistObject's Map must be the same "
809  "as the input Export/Import object's source Map.");
810  }
811  else { // revOp == DoReverse
812  const bool srcMapSameAsImportTgtMap =
813  srcDistObj->getMap ()->isSameAs (* (transfer.getTargetMap ()));
814  TEUCHOS_TEST_FOR_EXCEPTION
815  (! srcMapSameAsImportTgtMap, std::invalid_argument,
816  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
817  "communication, the source DistObject's Map must be the same "
818  "as the input Export/Import object's target Map.");
819  }
820  }
821  }
822 
823  const size_t numSameIDs = transfer.getNumSameIDs ();
824  Distributor& distor = transfer.getDistributor ();
825  const Details::DistributorPlan& distributorPlan = (revOp == DoForward) ? distor.getPlan() : *distor.getPlan().getReversePlan();
826 
827  TEUCHOS_TEST_FOR_EXCEPTION
828  (debug && restrictedMode &&
829  (transfer.getPermuteToLIDs_dv().extent(0) != 0 ||
830  transfer.getPermuteFromLIDs_dv().extent(0) != 0),
831  std::invalid_argument,
832  "Tpetra::DistObject::" << modeString << ": Transfer object "
833  "cannot have permutes in restricted mode.");
834 
835  // Do we need all communication buffers to live on host?
836  if (verbose) {
837  std::ostringstream os;
838  os << *prefix << "doTransfer: Use new interface; "
839  "commOnHost=" << (commOnHost ? "true" : "false") << endl;
840  std::cerr << os.str ();
841  }
842 
843  using const_lo_dv_type =
844  Kokkos::DualView<const local_ordinal_type*, buffer_device_type>;
845  const_lo_dv_type permuteToLIDs = (revOp == DoForward) ?
846  transfer.getPermuteToLIDs_dv () :
847  transfer.getPermuteFromLIDs_dv ();
848  const_lo_dv_type permuteFromLIDs = (revOp == DoForward) ?
849  transfer.getPermuteFromLIDs_dv () :
850  transfer.getPermuteToLIDs_dv ();
851  const_lo_dv_type remoteLIDs = (revOp == DoForward) ?
852  transfer.getRemoteLIDs_dv () :
853  transfer.getExportLIDs_dv ();
854  const_lo_dv_type exportLIDs = (revOp == DoForward) ?
855  transfer.getExportLIDs_dv () :
856  transfer.getRemoteLIDs_dv ();
857  const bool canTryAliasing = (revOp == DoForward) ?
858  transfer.areRemoteLIDsContiguous() :
859  transfer.areExportLIDsContiguous();
860  // const bool canTryAliasing = false;
861 
862  ProfilingRegion region_dTN(funcName);
863 
864  if (verbose) {
865  std::ostringstream os;
866  os << *prefix << "Input arguments:" << endl
867  << *prefix << " combineMode: " << combineModeToString (CM) << endl
868  << *prefix << " numSameIDs: " << numSameIDs << endl
869  << *prefix << " "
870  << dualViewStatusToString (permuteToLIDs, "permuteToLIDs") << endl
871  << *prefix << " "
872  << dualViewStatusToString (permuteFromLIDs, "permuteFromLIDs") << endl
873  << *prefix << " "
874  << dualViewStatusToString (remoteLIDs, "remoteLIDs") << endl
875  << *prefix << " "
876  << dualViewStatusToString (exportLIDs, "exportLIDs") << endl
877  << *prefix << " revOp: Do" << (revOp == DoReverse ? "Reverse" : "Forward") << endl
878  << *prefix << " commOnHost: " << (commOnHost ? "true" : "false") << endl;
879  std::cerr << os.str ();
880  }
881 
882  {
883  ProfilingRegion region_cs ("Tpetra::DistObject::beginTransfer::checkSizes");
884  if (verbose) {
885  std::ostringstream os;
886  os << *prefix << "1. checkSizes" << endl;
887  std::cerr << os.str ();
888  }
889  const bool checkSizesResult = this->checkSizes (src);
890  TEUCHOS_TEST_FOR_EXCEPTION
891  (! checkSizesResult, std::invalid_argument,
892  "Tpetra::DistObject::doTransfer: checkSizes() indicates that the "
893  "destination object is not a legal target for redistribution from the "
894  "source object. This probably means that they do not have the same "
895  "dimensions. For example, MultiVectors must have the same number of "
896  "rows and columns.");
897  }
898 
899  // The method may return zero even if the implementation actually
900  // does have a constant number of packets per LID. However, if it
901  // returns nonzero, we may use this information to avoid
902  // (re)allocating num{Ex,Im}portPacketsPerLID_. packAndPrepare()
903  // will set this to its final value.
904  //
905  // We only need this if CM != ZERO, but it has to be lifted out of
906  // that scope because there are multiple tests for CM != ZERO.
907  size_t constantNumPackets = this->constantNumberOfPackets ();
908  if (verbose) {
909  std::ostringstream os;
910  os << *prefix << "constantNumPackets=" << constantNumPackets << endl;
911  std::cerr << os.str ();
912  }
913 
914  // Do we need to do communication?
915  bool needCommunication = true;
916  // We only need to send data if the combine mode is not ZERO.
917  if (CM != ZERO) {
918  // This may be NULL. It will be used below.
919  const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
920 
921  if (revOp == DoReverse && ! this->isDistributed ()) {
922  needCommunication = false;
923  }
924  // FIXME (mfh 30 Jun 2013): Checking whether the source object
925  // is distributed requires a cast to DistObject. If it's not a
926  // DistObject, then I'm not quite sure what to do. Perhaps it
927  // would be more appropriate for SrcDistObject to have an
928  // isDistributed() method. For now, I'll just assume that we
929  // need to do communication unless the cast succeeds and the
930  // source is not distributed.
931  else if (revOp == DoForward && srcDistObj != NULL &&
932  ! srcDistObj->isDistributed ()) {
933  needCommunication = false;
934  }
935  } // if (CM != ZERO)
936  else {
937  needCommunication = false;
938  }
939 
940  // The operations for the transfer can be performed in different
941  // order. The "safe" way is
942  //
943  // - copyAndPermute |
944  // - packAndPrepare |--- beginTransfer
945  // - doPostRecvs |
946  // - doPostSends |
947  //
948  // - doWaitsRecv |
949  // - unpackAndCombine |--- endTransfer
950  // - doWaitsSend |
951 
952  // This is "safe" because the local computation steps
953  // copyAndPermute and packAndPrepare are free to run on host or
954  // device provided that the data is appropriately synced.
955  // Afterwards, all the communication options can run independently
956  // of the computation. This means that there are no constraints in
957  // terms of memory spaces out of which the different steps need to
958  // run.
959 
960  // However, for performance it can be beneficial to overlap
961  // communication and computation, leading to this sequence of
962  // operations:
963  //
964  // - doPostRecvs |
965  // - packAndPrepare |--- beginTransfer
966  // - doPostSends |
967  // - copyAndPermute |
968  //
969  // - doWaitsRecv |
970  // - unpackAndCombine |--- endTransfer
971  // - doWaitsSend |
972  //
973  // Note that this is not the same as overlap of communication and
974  // computation in the sparse matrix-vector product which would involve
975  // performing computation between beginTransfer and endTransfer.
976  //
977  // The second approach has two advantages:
978  // 1) Receives and sends are separated by computation. This
979  // decreases the likelihood of MPI having to allocate temporary
980  // buffers for unexpectedly received messages.
981  // 2) Sends and doWaitsRecv in endTransfer are seperated by
982  // copyAndPermute, giving MPI time to make progress.
983  //
984  // The downside of this approach is as follows. The imports view
985  // used for the receives is potentially aliased to a subview of
986  // the target. This means that MPI will modify the target in the
987  // memory space that is determined by GPU awareness
988  // (Behavior::assumeMpiIsGPUAware). Since copyAndPermute will also
989  // be writing to the target at the same time, it will need to
990  // modify target in the same space.
991  //
992  // Given these additional constraints, we currently only enable
993  // the overlapping of communication and computation when constantNumPackets > 0
994  // and Behavior::enableGranularTransfers().
995 
996  const bool overlapTransferSteps = (constantNumPackets != 0) && Behavior::enableGranularTransfers();
997 
998  if (verbose) {
999  std::ostringstream os;
1000  os << *prefix << "overlapTransferSteps=" << overlapTransferSteps << endl;
1001  std::cerr << os.str ();
1002  }
1003 
1004  // Decide whether copyAndPermute needs to be run.
1005  const bool thereAreIDsToCopy = (numSameIDs + permuteToLIDs.extent (0) != 0);
1006  const bool needCopyAndPermute = (!restrictedMode && thereAreIDsToCopy);
1007 
1008  if ( ! overlapTransferSteps ) {
1009 
1011  // copyAndPermute
1012 
1013  // NOTE (mfh 26 Apr 2016) Chris Baker's implementation understood
1014  // that if CM == INSERT || CM == REPLACE, the target object could
1015  // be write only. We don't optimize for that here.
1016 
1017  if ( needCopyAndPermute ) {
1018  // There is at least one GID to copy or permute.
1019 
1020  if (verbose) {
1021  std::ostringstream os;
1022  os << *prefix << "2. copyAndPermute" << endl;
1023  std::cerr << os.str ();
1024  }
1025  {
1026  ProfilingRegion region_cp ("Tpetra::DistObject::beginTransfer::copyAndPermute");
1027 
1028  this->copyAndPermute (src, numSameIDs, permuteToLIDs, permuteFromLIDs, CM);
1029  }
1030  if (verbose) {
1031  std::ostringstream os;
1032  os << *prefix << "After copyAndPermute:" << endl
1033  << *prefix << " "
1034  << dualViewStatusToString (permuteToLIDs, "permuteToLIDs")
1035  << endl
1036  << *prefix << " "
1037  << dualViewStatusToString (permuteFromLIDs, "permuteFromLIDs")
1038  << endl;
1039  std::cerr << os.str ();
1040  }
1041  }
1042 
1043  if ( ! needCommunication ) {
1044  if (verbose) {
1045  std::ostringstream os;
1046  os << *prefix << "Comm not needed; skipping" << endl;
1047  std::cerr << os.str ();
1048  }
1049  }
1050  else {
1052  // packAndPrepare
1053 
1054  if (constantNumPackets == 0) {
1055  if (verbose) {
1056  std::ostringstream os;
1057  os << *prefix << "3. (Re)allocate num{Ex,Im}portPacketsPerLID"
1058  << endl;
1059  std::cerr << os.str ();
1060  }
1061  // This only reallocates if necessary, that is, if the sizes
1062  // don't match.
1063  this->reallocArraysForNumPacketsPerLid (exportLIDs.extent (0),
1064  remoteLIDs.extent (0));
1065  }
1066 
1067  if (verbose) {
1068  std::ostringstream os;
1069  os << *prefix << "4. packAndPrepare: before, "
1070  << dualViewStatusToString (this->exports_, "exports_")
1071  << endl;
1072  std::cerr << os.str ();
1073  }
1074 
1075  doPackAndPrepare(src, exportLIDs, constantNumPackets, execution_space());
1076  if (commOnHost) {
1077  this->exports_.sync_host();
1078  }
1079  else {
1080  this->exports_.sync_device();
1081  }
1082 
1083  if (verbose) {
1084  std::ostringstream os;
1085  os << *prefix << "5.1. After packAndPrepare, "
1086  << dualViewStatusToString (this->exports_, "exports_")
1087  << endl;
1088  std::cerr << os.str ();
1089  }
1090 
1092  // reallocImportsIfNeeded
1093  if (constantNumPackets != 0) {
1094  ProfilingRegion region_reallocImportsIfNeeded("Tpetra::DistObject::beginTransfer::reallocImportsIfNeeded");
1095 
1096  // There are a constant number of packets per element. We
1097  // already know (from the number of "remote" (incoming)
1098  // elements) how many incoming elements we expect, so we can
1099  // resize the buffer accordingly.
1100  const size_t rbufLen = remoteLIDs.extent (0) * constantNumPackets;
1101  reallocImportsIfNeeded (rbufLen, verbose, prefix.get (), canTryAliasing, CM);
1102  }
1103 
1105  // doPostRecvs
1106 
1107  // If only one round of communication is required: post receives.
1108  // If two rounds are required: complete first round and post receives for second round.
1109  if (verbose) {
1110  std::ostringstream os;
1111  os << *prefix << "7.0. "
1112  << (revOp == DoReverse ? "Reverse" : "Forward")
1113  << " mode" << endl;
1114  std::cerr << os.str ();
1115  }
1116 
1117  doPostRecvs(distributorPlan, constantNumPackets, commOnHost, prefix, canTryAliasing, CM);
1118 
1120  // doPostSends
1121 
1122  doPostSends(distributorPlan, constantNumPackets, commOnHost, prefix);
1123  } // if ( needCommunication )
1124 
1125  } // if ( ! overlapTransferSteps )
1126  else {
1127 
1128  // Overlap local computation with communication
1129 
1130  if ( ! needCommunication ) {
1131  if (verbose) {
1132  std::ostringstream os;
1133  os << *prefix << "Comm not needed; skipping" << endl;
1134  std::cerr << os.str ();
1135  }
1136  }
1137  else {
1138 
1140  // doPostRecvs
1141 
1143  // reallocImportsIfNeeded
1144  if (constantNumPackets != 0) {
1145  ProfilingRegion region_reallocImportsIfNeeded("Tpetra::DistObject::beginTransfer::reallocImportsIfNeeded");
1146 
1147  // There are a constant number of packets per element. We
1148  // already know (from the number of "remote" (incoming)
1149  // elements) how many incoming elements we expect, so we can
1150  // resize the buffer accordingly.
1151  const size_t rbufLen = remoteLIDs.extent (0) * constantNumPackets;
1152  reallocImportsIfNeeded (rbufLen, verbose, prefix.get (), canTryAliasing, CM);
1153  }
1154 
1155  // If only one round of communication is required: post receives.
1156  // If two rounds are required: complete first round and post receives for second round.
1157  if (verbose) {
1158  std::ostringstream os;
1159  os << *prefix << "7.0. "
1160  << (revOp == DoReverse ? "Reverse" : "Forward")
1161  << " mode" << endl;
1162  std::cerr << os.str ();
1163  }
1164 
1165  doPostRecvs(distributorPlan, constantNumPackets, commOnHost, prefix, canTryAliasing, CM);
1166 
1168  // packAndPrepare
1169 
1170  if (constantNumPackets == 0) {
1171  if (verbose) {
1172  std::ostringstream os;
1173  os << *prefix << "3. (Re)allocate num{Ex,Im}portPacketsPerLID"
1174  << endl;
1175  std::cerr << os.str ();
1176  }
1177  // This only reallocates if necessary, that is, if the sizes
1178  // don't match.
1179  this->reallocArraysForNumPacketsPerLid (exportLIDs.extent (0),
1180  remoteLIDs.extent (0));
1181  }
1182 
1183  if (verbose) {
1184  std::ostringstream os;
1185  os << *prefix << "4. packAndPrepare: before, "
1186  << dualViewStatusToString (this->exports_, "exports_")
1187  << endl;
1188  std::cerr << os.str ();
1189  }
1190 
1191  doPackAndPrepare(src, exportLIDs, constantNumPackets, execution_space());
1192 
1193  if (commOnHost) {
1194  this->exports_.sync_host();
1195  }
1196  else {
1197  this->exports_.sync_device();
1198  }
1199 
1200  if (verbose) {
1201  std::ostringstream os;
1202  os << *prefix << "5.1. After packAndPrepare, "
1203  << dualViewStatusToString (this->exports_, "exports_")
1204  << endl;
1205  std::cerr << os.str ();
1206  }
1207 
1209  // doPostSends
1210 
1211  doPostSends(distributorPlan, constantNumPackets, commOnHost, prefix);
1212 
1213  } // if ( needCommunication )
1214 
1216  // copyAndPermute
1217 
1218  // NOTE (mfh 26 Apr 2016) Chris Baker's implementation understood
1219  // that if CM == INSERT || CM == REPLACE, the target object could
1220  // be write only. We don't optimize for that here.
1221 
1222  if ( needCopyAndPermute ) {
1223  // There is at least one GID to copy or permute.
1224  if (verbose) {
1225  std::ostringstream os;
1226  os << *prefix << "2. copyAndPermute" << endl;
1227  std::cerr << os.str ();
1228  }
1229 
1230  {
1231  ProfilingRegion region_cp("Tpetra::DistObject::beginTransfer::copyAndPermute");
1232 
1233  this->copyAndPermute (src, numSameIDs, permuteToLIDs, permuteFromLIDs, CM);
1234  }
1235 
1236  if (verbose) {
1237  std::ostringstream os;
1238  os << *prefix << "After copyAndPermute:" << endl
1239  << *prefix << " "
1240  << dualViewStatusToString (permuteToLIDs, "permuteToLIDs")
1241  << endl
1242  << *prefix << " "
1243  << dualViewStatusToString (permuteFromLIDs, "permuteFromLIDs")
1244  << endl;
1245  std::cerr << os.str ();
1246  }
1247  }
1248  } // if ( overlapTransferSteps )
1249  }
1250 
1251  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1252  void
1254  endTransfer(const SrcDistObject& src,
1255  const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
1256  const char modeString[],
1257  const ReverseOption revOp,
1258  const CombineMode CM,
1259  bool restrictedMode)
1260  {
1261  using Details::Behavior;
1265  using Kokkos::Compat::getArrayView;
1266  using Kokkos::Compat::getConstArrayView;
1267  using Kokkos::Compat::getKokkosViewDeepCopy;
1268  using Kokkos::Compat::create_const_view;
1269  using std::endl;
1272 
1273  const bool commOnHost = ! Behavior::assumeMpiIsGPUAware ();
1274  const char funcNameHost[] = "Tpetra::DistObject::endTransfer[Host]";
1275  const char funcNameDevice[] = "Tpetra::DistObject::endTransfer[Device]";
1276  const char *funcName = commOnHost ? funcNameHost : funcNameDevice;
1277  ProfilingRegion region_doTransfer(funcName);
1278  const bool verbose = Behavior::verbose("DistObject");
1279  std::shared_ptr<std::string> prefix;
1280  if (verbose) {
1281  std::ostringstream os;
1282  prefix = this->createPrefix("DistObject", "doTransfer");
1283  os << *prefix << "Source type: " << Teuchos::typeName(src)
1284  << ", Target type: " << Teuchos::typeName(*this) << endl;
1285  std::cerr << os.str();
1286  }
1287 
1288  // "Restricted Mode" does two things:
1289  // 1) Skips copyAndPermute
1290  // 2) Allows the "target" Map of the transfer to be a subset of
1291  // the Map of *this, in a "locallyFitted" sense.
1292  //
1293  // This cannot be used if #2 is not true, OR there are permutes.
1294  // Source Maps still need to match
1295 
1296  // mfh 18 Oct 2017: Set TPETRA_DEBUG to true to enable extra debug
1297  // checks. These may communicate more.
1298  const bool debug = Behavior::debug("DistObject");
1299  if (debug) {
1300  if (! restrictedMode && revOp == DoForward) {
1301  const bool myMapSameAsTransferTgtMap =
1302  this->getMap ()->isSameAs (* (transfer.getTargetMap ()));
1303  TEUCHOS_TEST_FOR_EXCEPTION
1304  (! myMapSameAsTransferTgtMap, std::invalid_argument,
1305  "Tpetra::DistObject::" << modeString << ": For forward-mode "
1306  "communication, the target DistObject's Map must be the same "
1307  "(in the sense of Tpetra::Map::isSameAs) as the input "
1308  "Export/Import object's target Map.");
1309  }
1310  else if (! restrictedMode && revOp == DoReverse) {
1311  const bool myMapSameAsTransferSrcMap =
1312  this->getMap ()->isSameAs (* (transfer.getSourceMap ()));
1313  TEUCHOS_TEST_FOR_EXCEPTION
1314  (! myMapSameAsTransferSrcMap, std::invalid_argument,
1315  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
1316  "communication, the target DistObject's Map must be the same "
1317  "(in the sense of Tpetra::Map::isSameAs) as the input "
1318  "Export/Import object's source Map.");
1319  }
1320  else if (restrictedMode && revOp == DoForward) {
1321  const bool myMapLocallyFittedTransferTgtMap =
1322  this->getMap ()->isLocallyFitted (* (transfer.getTargetMap ()));
1323  TEUCHOS_TEST_FOR_EXCEPTION
1324  (! myMapLocallyFittedTransferTgtMap , std::invalid_argument,
1325  "Tpetra::DistObject::" << modeString << ": For forward-mode "
1326  "communication using restricted mode, Export/Import object's "
1327  "target Map must be locally fitted (in the sense of "
1328  "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
1329  }
1330  else { // if (restrictedMode && revOp == DoReverse)
1331  const bool myMapLocallyFittedTransferSrcMap =
1332  this->getMap ()->isLocallyFitted (* (transfer.getSourceMap ()));
1333  TEUCHOS_TEST_FOR_EXCEPTION
1334  (! myMapLocallyFittedTransferSrcMap, std::invalid_argument,
1335  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
1336  "communication using restricted mode, Export/Import object's "
1337  "source Map must be locally fitted (in the sense of "
1338  "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
1339  }
1340 
1341  // SrcDistObject need not even _have_ Maps. However, if the
1342  // source object is a DistObject, it has a Map, and we may
1343  // compare that Map with the Transfer's Maps.
1344  const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
1345  if (srcDistObj != nullptr) {
1346  if (revOp == DoForward) {
1347  const bool srcMapSameAsImportSrcMap =
1348  srcDistObj->getMap ()->isSameAs (* (transfer.getSourceMap ()));
1349  TEUCHOS_TEST_FOR_EXCEPTION
1350  (! srcMapSameAsImportSrcMap, std::invalid_argument,
1351  "Tpetra::DistObject::" << modeString << ": For forward-mode "
1352  "communication, the source DistObject's Map must be the same "
1353  "as the input Export/Import object's source Map.");
1354  }
1355  else { // revOp == DoReverse
1356  const bool srcMapSameAsImportTgtMap =
1357  srcDistObj->getMap ()->isSameAs (* (transfer.getTargetMap ()));
1358  TEUCHOS_TEST_FOR_EXCEPTION
1359  (! srcMapSameAsImportTgtMap, std::invalid_argument,
1360  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
1361  "communication, the source DistObject's Map must be the same "
1362  "as the input Export/Import object's target Map.");
1363  }
1364  }
1365  }
1366 
1367  Distributor& distor = transfer.getDistributor ();
1368  const Details::DistributorPlan& distributorPlan = (revOp == DoForward) ? distor.getPlan() : *distor.getPlan().getReversePlan();
1369 
1370  TEUCHOS_TEST_FOR_EXCEPTION
1371  (debug && restrictedMode &&
1372  (transfer.getPermuteToLIDs_dv().extent(0) != 0 ||
1373  transfer.getPermuteFromLIDs_dv().extent(0) != 0),
1374  std::invalid_argument,
1375  "Tpetra::DistObject::" << modeString << ": Transfer object "
1376  "cannot have permutes in restricted mode.");
1377 
1378  // Do we need all communication buffers to live on host?
1379  if (verbose) {
1380  std::ostringstream os;
1381  os << *prefix << "doTransfer: Use new interface; "
1382  "commOnHost=" << (commOnHost ? "true" : "false") << endl;
1383  std::cerr << os.str ();
1384  }
1385 
1386  using const_lo_dv_type =
1387  Kokkos::DualView<const local_ordinal_type*, buffer_device_type>;
1388  const_lo_dv_type remoteLIDs = (revOp == DoForward) ?
1389  transfer.getRemoteLIDs_dv () :
1390  transfer.getExportLIDs_dv ();
1391 
1392  size_t constantNumPackets = this->constantNumberOfPackets ();
1393 
1394  // We only need to send data if the combine mode is not ZERO.
1395  if (CM != ZERO) {
1396  // Do we need to do communication (via doWaitsRecv and doWaitsSend)?
1397  bool needCommunication = true;
1398 
1399  // This may be NULL. It will be used below.
1400  const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
1401 
1402  if (revOp == DoReverse && ! this->isDistributed ()) {
1403  needCommunication = false;
1404  }
1405  // FIXME (mfh 30 Jun 2013): Checking whether the source object
1406  // is distributed requires a cast to DistObject. If it's not a
1407  // DistObject, then I'm not quite sure what to do. Perhaps it
1408  // would be more appropriate for SrcDistObject to have an
1409  // isDistributed() method. For now, I'll just assume that we
1410  // need to do communication unless the cast succeeds and the
1411  // source is not distributed.
1412  else if (revOp == DoForward && srcDistObj != NULL &&
1413  ! srcDistObj->isDistributed ()) {
1414  needCommunication = false;
1415  }
1416 
1417  if (! needCommunication) {
1418  if (verbose) {
1419  std::ostringstream os;
1420  os << *prefix << "Comm not needed; skipping" << endl;
1421  std::cerr << os.str ();
1422  }
1423  }
1424  else {
1425  distributorActor_.doWaitsRecv(distributorPlan);
1426 
1427  if (verbose) {
1428  std::ostringstream os;
1429  os << *prefix << "8. unpackAndCombine - remoteLIDs " << remoteLIDs.extent(0) << ", constantNumPackets " << constantNumPackets << endl;
1430  std::cerr << os.str ();
1431  }
1432  doUnpackAndCombine(remoteLIDs, constantNumPackets, CM, execution_space());
1433 
1434  distributorActor_.doWaitsSend(distributorPlan);
1435  } // if (needCommunication)
1436  } // if (CM != ZERO)
1437 
1438  if (verbose) {
1439  std::ostringstream os;
1440  os << *prefix << "9. Done!" << endl;
1441  std::cerr << os.str ();
1442  }
1443 
1444  if (verbose) {
1445  std::ostringstream os;
1446  os << *prefix << "Tpetra::DistObject::doTransfer: Done!" << endl;
1447  std::cerr << os.str ();
1448  }
1449  }
1450 
1451  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1452  void
1453  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1454  doPostRecvs(const Details::DistributorPlan& distributorPlan,
1455  size_t constantNumPackets,
1456  bool commOnHost,
1457  std::shared_ptr<std::string> prefix,
1458  const bool canTryAliasing,
1459  const CombineMode CM)
1460  {
1463  using Kokkos::Compat::create_const_view;
1464  using std::endl;
1465  using Details::ProfilingRegion;
1466 
1467  const char funcNameHost[] = "Tpetra::DistObject::doPostRecvs[Host]";
1468  const char funcNameDevice[] = "Tpetra::DistObject::doPostRecvs[Device]";
1469  const char *funcName = commOnHost ? funcNameHost : funcNameDevice;
1470  ProfilingRegion region_dpr (funcName);
1471 
1472  const bool verbose = Details::Behavior::verbose("DistObject");
1473 
1474  if (constantNumPackets == 0) { // variable num packets per LID
1475  if (verbose) {
1476  std::ostringstream os;
1477  os << *prefix << "7.1. Variable # packets / LID: first comm "
1478  << "(commOnHost = " << (commOnHost ? "true" : "false") << ")"
1479  << endl;
1480  std::cerr << os.str ();
1481  }
1482  size_t totalImportPackets = 0;
1483  if (commOnHost) {
1484  if (this->numExportPacketsPerLID_.need_sync_host ()) {
1485  this->numExportPacketsPerLID_.sync_host ();
1486  }
1487  if (this->numImportPacketsPerLID_.need_sync_host ()) {
1488  this->numImportPacketsPerLID_.sync_host ();
1489  }
1490  this->numImportPacketsPerLID_.modify_host (); // out arg
1491  auto numExp_h =
1492  create_const_view (this->numExportPacketsPerLID_.view_host ());
1493  auto numImp_h = this->numImportPacketsPerLID_.view_host ();
1494 
1495  // MPI communication happens here.
1496  if (verbose) {
1497  std::ostringstream os;
1498  os << *prefix << "Call doPostsAndWaits"
1499  << endl;
1500  std::cerr << os.str ();
1501  }
1502  distributorActor_.doPostsAndWaits(distributorPlan, numExp_h, 1, numImp_h);
1503 
1504  if (verbose) {
1505  std::ostringstream os;
1506  os << *prefix << "Count totalImportPackets" << std::endl;
1507  std::cerr << os.str ();
1508  }
1509  using the_dev_type = typename decltype (numImp_h)::device_type;
1510  totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_h);
1511  }
1512  else { // ! commOnHost
1513  this->numExportPacketsPerLID_.sync_device ();
1514  this->numImportPacketsPerLID_.sync_device ();
1515  this->numImportPacketsPerLID_.modify_device (); // out arg
1516  auto numExp_d = create_const_view
1517  (this->numExportPacketsPerLID_.view_device ());
1518  auto numImp_d = this->numImportPacketsPerLID_.view_device ();
1519 
1520  // MPI communication happens here.
1521  if (verbose) {
1522  std::ostringstream os;
1523  os << *prefix << "Call doPostsAndWaits"
1524  << endl;
1525  std::cerr << os.str ();
1526  }
1527 
1528  distributorActor_.doPostsAndWaits(distributorPlan, numExp_d, 1, numImp_d);
1529 
1530  if (verbose) {
1531  std::ostringstream os;
1532  os << *prefix << "Count totalImportPackets" << std::endl;
1533  std::cerr << os.str ();
1534  }
1535  using the_dev_type = typename decltype (numImp_d)::device_type;
1536  totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_d);
1537  }
1538 
1539  if (verbose) {
1540  std::ostringstream os;
1541  os << *prefix << "totalImportPackets=" << totalImportPackets << endl;
1542  std::cerr << os.str ();
1543  }
1544  this->reallocImportsIfNeeded (totalImportPackets, verbose,
1545  prefix.get (), canTryAliasing, CM);
1546  if (verbose) {
1547  std::ostringstream os;
1548  os << *prefix << "7.3. Second comm" << std::endl;
1549  std::cerr << os.str ();
1550  }
1551 
1552  // mfh 04 Feb 2019: Distributor expects the "num packets per
1553  // LID" arrays on host, so that it can issue MPI sends and
1554  // receives correctly.
1555  this->numImportPacketsPerLID_.sync_host ();
1556 
1557  // NOTE (mfh 25 Apr 2016, 01 Aug 2017) doPostsAndWaits and
1558  // doReversePostsAndWaits currently want
1559  // numExportPacketsPerLID and numImportPacketsPerLID as
1560  // Teuchos::ArrayView, rather than as Kokkos::View.
1561  //
1562  // NOTE (mfh 04 Feb 2019) This does NOT copy from host to
1563  // device. The above syncs might.
1564  auto numImportPacketsPerLID_av =
1565  getArrayViewFromDualView (this->numImportPacketsPerLID_);
1566 
1567  // imports_ is for output only, so we don't need to sync it
1568  // before marking it as modified. However, in order to
1569  // prevent spurious debug-mode errors (e.g., "modified on
1570  // both device and host"), we first need to clear its
1571  // "modified" flags.
1572  this->imports_.clear_sync_state ();
1573 
1574  if (verbose) {
1575  std::ostringstream os;
1576  os << *prefix << "Comm on "
1577  << (commOnHost ? "host" : "device")
1578  << "; call doPostRecvs" << endl;
1579  std::cerr << os.str ();
1580  }
1581 
1582  if (commOnHost) {
1583  this->imports_.modify_host ();
1584  distributorActor_.doPostRecvs
1585  (distributorPlan,
1586  this->imports_.view_host (),
1587  numImportPacketsPerLID_av);
1588  }
1589  else { // pack on device
1590  this->imports_.modify_device ();
1591  distributorActor_.doPostRecvs
1592  (distributorPlan,
1593  this->imports_.view_device (),
1594  numImportPacketsPerLID_av);
1595  }
1596  }
1597  else { // constant number of packets per LID
1598  if (verbose) {
1599  std::ostringstream os;
1600  os << *prefix << "7.1. Const # packets per LID: " << endl
1601  << *prefix << " "
1602  << dualViewStatusToString (this->exports_, "exports_")
1603  << endl
1604  << *prefix << " "
1605  << dualViewStatusToString (this->exports_, "imports_")
1606  << endl;
1607  std::cerr << os.str ();
1608  }
1609  // imports_ is for output only, so we don't need to sync it
1610  // before marking it as modified. However, in order to
1611  // prevent spurious debug-mode errors (e.g., "modified on
1612  // both device and host"), we first need to clear its
1613  // "modified" flags.
1614  this->imports_.clear_sync_state ();
1615 
1616  if (verbose) {
1617  std::ostringstream os;
1618  os << *prefix << "7.2. Comm on "
1619  << (commOnHost ? "host" : "device")
1620  << "; call doPostRecvs" << endl;
1621  std::cerr << os.str ();
1622  }
1623  if (commOnHost) {
1624  this->imports_.modify_host ();
1625  distributorActor_.doPostRecvs
1626  (distributorPlan,
1627  constantNumPackets,
1628  this->imports_.view_host ());
1629  }
1630  else { // pack on device
1631  this->imports_.modify_device ();
1632  distributorActor_.doPostRecvs
1633  (distributorPlan,
1634  constantNumPackets,
1635  this->imports_.view_device ());
1636  } // commOnHost
1637  } // constant or variable num packets per LID
1638  }
1639 
1640  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1641  void
1642  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1643  doPostSends(const Details::DistributorPlan& distributorPlan,
1644  size_t constantNumPackets,
1645  bool commOnHost,
1646  std::shared_ptr<std::string> prefix)
1647  {
1649  using Kokkos::Compat::create_const_view;
1650  using std::endl;
1651  using Details::ProfilingRegion;
1652 
1653  const char funcNameHost[] = "Tpetra::DistObject::doPostSends[Host]";
1654  const char funcNameDevice[] = "Tpetra::DistObject::doPostSends[Device]";
1655  const char *funcName = commOnHost ? funcNameHost : funcNameDevice;
1656  ProfilingRegion region_dps (funcName);
1657 
1658  const bool verbose = Details::Behavior::verbose("DistObject");
1659  if (verbose) {
1660  std::ostringstream os;
1661  os << *prefix << "Comm on "
1662  << (commOnHost ? "host" : "device")
1663  << "; call doPostSends" << endl;
1664  std::cerr << os.str ();
1665  }
1666 
1667  if (constantNumPackets == 0) { // variable num packets per LID
1668  // mfh 04 Feb 2019: Distributor expects the "num packets per
1669  // LID" arrays on host, so that it can issue MPI sends and
1670  // receives correctly.
1671  this->numExportPacketsPerLID_.sync_host ();
1672  this->numImportPacketsPerLID_.sync_host ();
1673 
1674  // NOTE (mfh 25 Apr 2016, 01 Aug 2017) doPostsAndWaits and
1675  // doReversePostsAndWaits currently want
1676  // numExportPacketsPerLID and numImportPacketsPerLID as
1677  // Teuchos::ArrayView, rather than as Kokkos::View.
1678  //
1679  // NOTE (mfh 04 Feb 2019) This does NOT copy from host to
1680  // device. The above syncs might.
1681  auto numExportPacketsPerLID_av =
1682  getArrayViewFromDualView (this->numExportPacketsPerLID_);
1683  auto numImportPacketsPerLID_av =
1684  getArrayViewFromDualView (this->numImportPacketsPerLID_);
1685 
1686  if (commOnHost) {
1687  distributorActor_.doPostSends
1688  (distributorPlan,
1689  create_const_view (this->exports_.view_host ()),
1690  numExportPacketsPerLID_av,
1691  this->imports_.view_host (),
1692  numImportPacketsPerLID_av);
1693  }
1694  else { // pack on device
1695  // We need to guarantee that packAndPrepare is done before we send.
1696  Kokkos::fence("DistObject::doPostSends-1"); // for UVM
1697  distributorActor_.doPostSends
1698  (distributorPlan,
1699  create_const_view (this->exports_.view_device ()),
1700  numExportPacketsPerLID_av,
1701  this->imports_.view_device (),
1702  numImportPacketsPerLID_av);
1703  }
1704  }
1705  else { // constant number of packets per LID
1706  if (commOnHost) {
1707  distributorActor_.doPostSends
1708  (distributorPlan,
1709  create_const_view (this->exports_.view_host ()),
1710  constantNumPackets,
1711  this->imports_.view_host ());
1712  }
1713  else { // pack on device
1714  // We need to guarantee that packAndPrepare is done before we send.
1715  Kokkos::fence("DistObject::doPostSends-2"); // for UVM
1716  distributorActor_.doPostSends
1717  (distributorPlan,
1718  create_const_view (this->exports_.view_device ()),
1719  constantNumPackets,
1720  this->imports_.view_device ());
1721  } // commOnHost
1722  } // constant or variable num packets per LID
1723  }
1724 
1725  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1726  void
1727  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1728  doPackAndPrepare(const SrcDistObject& src,
1729  const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
1730  size_t& constantNumPackets,
1731  const execution_space &space)
1732  {
1733  using Details::ProfilingRegion;
1734  using std::endl;
1735  const bool debug = Details::Behavior::debug("DistObject");
1736 
1737  ProfilingRegion region_pp
1738  ("Tpetra::DistObject::doPackAndPrepare");
1739 
1740  // Ask the source to pack data. Also ask it whether there are
1741  // a constant number of packets per element
1742  // (constantNumPackets is an output argument). If there are,
1743  // constantNumPackets will come back nonzero. Otherwise, the
1744  // source will fill the numExportPacketsPerLID_ array.
1745 
1746  // FIXME (mfh 18 Oct 2017) if (! commOnHost), sync to device?
1747  // Alternately, make packAndPrepare take a "commOnHost"
1748  // argument to tell it where to leave the data?
1749  //
1750  // NOTE (mfh 04 Feb 2019) Subclasses of DistObject should have
1751  // the freedom to pack and unpack either on host or device.
1752  // We should prefer sync'ing only on demand. Thus, we can
1753  // answer the above question: packAndPrepare should not
1754  // take a commOnHost argument, and doTransferNew should sync
1755  // where needed, if needed.
1756  if (debug) {
1757  std::ostringstream lclErrStrm;
1758  bool lclSuccess = false;
1759  try {
1760  this->packAndPrepare (src, exportLIDs, this->exports_,
1761  this->numExportPacketsPerLID_,
1762  constantNumPackets, space);
1763  lclSuccess = true;
1764  }
1765  catch (std::exception& e) {
1766  lclErrStrm << "packAndPrepare threw an exception: "
1767  << endl << e.what();
1768  }
1769  catch (...) {
1770  lclErrStrm << "packAndPrepare threw an exception "
1771  "not a subclass of std::exception.";
1772  }
1773  const char gblErrMsgHeader[] = "Tpetra::DistObject "
1774  "threw an exception in packAndPrepare on "
1775  "one or more processes in the DistObject's communicator.";
1776  auto comm = getMap()->getComm();
1777  Details::checkGlobalError(std::cerr, lclSuccess,
1778  lclErrStrm.str().c_str(),
1779  gblErrMsgHeader, *comm);
1780  }
1781  else {
1782  this->packAndPrepare (src, exportLIDs, this->exports_,
1783  this->numExportPacketsPerLID_,
1784  constantNumPackets, space);
1785  }
1786  }
1787 
1788  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1789  void
1790  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1791  doUnpackAndCombine(const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& remoteLIDs,
1792  size_t constantNumPackets,
1793  CombineMode CM,
1794  const execution_space &space)
1795  {
1796  using Details::ProfilingRegion;
1797  using std::endl;
1798  const bool debug = Details::Behavior::debug("DistObject");
1799 
1800  ProfilingRegion region_uc
1801  ("Tpetra::DistObject::doUnpackAndCombine");
1802 
1803  if (debug) {
1804  std::ostringstream lclErrStrm;
1805  bool lclSuccess = false;
1806  try {
1807  this->unpackAndCombine (remoteLIDs, this->imports_,
1808  this->numImportPacketsPerLID_,
1809  constantNumPackets, CM, space);
1810  lclSuccess = true;
1811  }
1812  catch (std::exception& e) {
1813  lclErrStrm << "doUnpackAndCombine threw an exception: "
1814  << endl << e.what();
1815  }
1816  catch (...) {
1817  lclErrStrm << "doUnpackAndCombine threw an exception "
1818  "not a subclass of std::exception.";
1819  }
1820  const char gblErrMsgHeader[] = "Tpetra::DistObject "
1821  "threw an exception in unpackAndCombine on "
1822  "one or more processes in the DistObject's communicator.";
1823  auto comm = getMap()->getComm();
1824  Details::checkGlobalError(std::cerr, lclSuccess,
1825  lclErrStrm.str().c_str(),
1826  gblErrMsgHeader, *comm);
1827  }
1828  else {
1829  this->unpackAndCombine (remoteLIDs, this->imports_,
1830  this->numImportPacketsPerLID_,
1831  constantNumPackets, CM, space);
1832  }
1833  }
1834 
1835  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1836  void
1839  (const SrcDistObject&,
1840  const size_t,
1841  const Kokkos::DualView<
1842  const local_ordinal_type*,
1844  const Kokkos::DualView<
1845  const local_ordinal_type*,
1847  const CombineMode CM)
1848  {}
1849 
1850 // clang-format on
1851 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1853  const SrcDistObject &source, const size_t numSameIDs,
1854  const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1855  &permuteToLIDs,
1856  const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1857  &permuteFromLIDs,
1858  const CombineMode CM, const execution_space &space) {
1859  /*
1860  This is called if the derived class doesn't know how to pack and prepare in
1861  an arbitrary execution space instance, but it was asked to anyway.
1862  Provide a safe illusion by actually doing the work in the default instance,
1863  and syncing the default instance with the provided instance.
1864  The caller expects
1865  1. any work in the provided instance to complete before this.
1866  2. This to complete before any following work in the provided instance.
1867  */
1868 
1869  space.fence(); // // TODO: Tpetra::Details::Spaces::exec_space_wait
1870  copyAndPermute(source, numSameIDs, permuteToLIDs, permuteFromLIDs,
1871  CM); // default instance
1872  execution_space().fence(); // TODO:
1873  // Tpetra::Details::Spaces::exec_space_wait
1874 }
1875 // clang-format off
1876 
1877 
1878  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1879  void
1882  (const SrcDistObject&,
1883  const Kokkos::DualView<
1884  const local_ordinal_type*,
1886  Kokkos::DualView<
1887  packet_type*,
1889  Kokkos::DualView<
1890  size_t*,
1892  size_t&)
1893  {}
1894 
1895 // clang-format on
1896 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1898  const SrcDistObject &source,
1899  const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1900  &exportLIDs,
1901  Kokkos::DualView<packet_type *, buffer_device_type> &exports,
1902  Kokkos::DualView<size_t *, buffer_device_type> numPacketsPerLID,
1903  size_t &constantNumPackets, const execution_space &space) {
1904  /*
1905  This is called if the derived class doesn't know how to pack and prepare in
1906  an arbitrary execution space instance, but it was asked to anyway.
1907  Provide a safe illusion by actually doing the work in the default instance,
1908  and syncing the default instance with the provided instance.
1909 
1910  The caller expects
1911  1. any work in the provided instance to complete before this.
1912  2. This to complete before any following work in the provided instance.
1913  */
1914 
1915  // wait for any work from prior operations in the provided instance to
1916  // complete
1917  space.fence(); // TODO: Details::Spaces::exec_space_wait
1918 
1919  // pack and prepare in the default instance.
1920  packAndPrepare(source, exportLIDs, exports, numPacketsPerLID,
1921  constantNumPackets); // default instance
1922 
1923  // wait for the default instance to complete before returning, so any
1924  // following work inserted into the provided instance will be done after this
1925  execution_space().fence(); // TODO: Details::Spaces::exec_space_wait
1926 }
1927 // clang-format off
1928 
1929  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1930  void
1933  (const Kokkos::DualView<
1934  const local_ordinal_type*,
1935  buffer_device_type>& /* importLIDs */,
1936  Kokkos::DualView<
1937  packet_type*,
1938  buffer_device_type> /* imports */,
1939  Kokkos::DualView<
1940  size_t*,
1941  buffer_device_type> /* numPacketsPerLID */,
1942  const size_t /* constantNumPackets */,
1943  const CombineMode /* combineMode */)
1944  {}
1945 
1946 // clang-format on
1947 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1949  const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1950  &importLIDs,
1951  Kokkos::DualView<packet_type *, buffer_device_type> imports,
1952  Kokkos::DualView<size_t *, buffer_device_type> numPacketsPerLID,
1953  const size_t constantNumPackets, const CombineMode combineMode,
1954  const execution_space &space) {
1955  // Wait for any work in the provided space to complete
1956  space.fence(); // TODO: Details::Spaces::exec_space_wait(execution_space(),
1957  // space);
1958  unpackAndCombine(importLIDs, imports, numPacketsPerLID, constantNumPackets,
1959  combineMode); // default instance
1960  // wait for unpack to finish in the default instance, since the caller
1961  // may be expecting sequential semantics in the `space` instance
1962  execution_space().fence(); // TODO: Details::Spaces::exec_space_wait(space,
1963  // execution_space());
1964 }
1965 // clang-format off
1966 
1967 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1969  std::ostream &os) const {
1970  using std::endl;
1971  using Teuchos::FancyOStream;
1972  using Teuchos::getFancyOStream;
1973  using Teuchos::RCP;
1974  using Teuchos::rcpFromRef;
1975 
1976  RCP<FancyOStream> out = getFancyOStream(rcpFromRef(os));
1977  this->describe(*out, Teuchos::VERB_DEFAULT);
1978 }
1979 
1980 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1981 std::unique_ptr<std::string>
1983  const char className[], const char methodName[]) const {
1984  auto map = this->getMap();
1985  auto comm = map.is_null() ? Teuchos::null : map->getComm();
1986  return Details::createPrefix(comm.getRawPtr(), className, methodName);
1987 }
1988 
1989 template <class DistObjectType>
1991  Teuchos::RCP<DistObjectType> &input,
1992  const Teuchos::RCP<const Map<typename DistObjectType::local_ordinal_type,
1993  typename DistObjectType::global_ordinal_type,
1994  typename DistObjectType::node_type>> &newMap) {
1995  input->removeEmptyProcessesInPlace(newMap);
1996  if (newMap.is_null()) { // my process is excluded
1997  input = Teuchos::null;
1998  }
1999 }
2000 
2001 template <class DistObjectType>
2002 void removeEmptyProcessesInPlace(Teuchos::RCP<DistObjectType> &input) {
2003  auto newMap = input->getMap()->removeEmptyProcesses();
2004  removeEmptyProcessesInPlace<DistObjectType>(input, newMap);
2005 }
2006 
2007 // Explicit instantiation macro for general DistObject.
2008 #define TPETRA_DISTOBJECT_INSTANT(SCALAR, LO, GO, NODE) \
2009  template class DistObject<SCALAR, LO, GO, NODE>;
2010 
2011 // Explicit instantiation macro for DistObject<char, ...>.
2012 // The "SLGN" stuff above doesn't work for Packet=char.
2013 #define TPETRA_DISTOBJECT_INSTANT_CHAR(LO, GO, NODE) \
2014  template class DistObject<char, LO, GO, NODE>;
2015 
2016 } // namespace Tpetra
2017 
2018 #endif // TPETRA_DISTOBJECT_DEF_HPP
2019 // clang-format on
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
const Details::DistributorPlan & getPlan() const
Get this Distributor&#39;s DistributorPlan.
virtual void copyAndPermute(const SrcDistObject &source, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM)
Perform copies and permutations that are local to the calling (MPI) process.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
void doImport(const SrcDistObject &source, const Import< LocalOrdinal, GlobalOrdinal, Node > &importer, const CombineMode CM, const bool restrictedMode=false)
Import data into this object using an Import object (&quot;forward mode&quot;).
typename::Kokkos::ArithTraits< Packet >::val_type packet_type
The type of each datum being sent or received in an Import or Export.
void print(std::ostream &os) const
Print this object to the given output stream.
virtual bool reallocArraysForNumPacketsPerLid(const size_t numExportLIDs, const size_t numImportLIDs)
Reallocate numExportPacketsPerLID_ and/or numImportPacketsPerLID_, if necessary.
bool isDistributed() const
Whether this is a globally distributed object.
void removeEmptyProcessesInPlace(Teuchos::RCP< DistObjectType > &input, const Teuchos::RCP< const Map< typename DistObjectType::local_ordinal_type, typename DistObjectType::global_ordinal_type, typename DistObjectType::node_type > > &newMap)
Remove processes which contain no elements in this object&#39;s Map.
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode)
Perform any unpacking and combining after communication.
static bool debug()
Whether Tpetra is in debug mode.
virtual void doTransfer(const SrcDistObject &src, const ::Tpetra::Details::Transfer< local_ordinal_type, global_ordinal_type, node_type > &transfer, const char modeString[], const ReverseOption revOp, const CombineMode CM, const bool restrictedMode)
Redistribute data across (MPI) processes.
void beginTransfer(const SrcDistObject &src, const ::Tpetra::Details::Transfer< local_ordinal_type, global_ordinal_type, node_type > &transfer, const char modeString[], const ReverseOption revOp, const CombineMode CM, const bool restrictedMode)
Implementation detail of doTransfer.
typename device_type::execution_space execution_space
The Kokkos execution space.
Kokkos::DualView< T *, DT > getDualViewCopyFromArrayView(const Teuchos::ArrayView< const T > &x_av, const char label[], const bool leaveOnHost)
Get a 1-D Kokkos::DualView which is a deep copy of the input Teuchos::ArrayView (which views host mem...
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
virtual void packAndPrepare(const SrcDistObject &source, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< packet_type *, buffer_device_type > &exports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, size_t &constantNumPackets)
Pack data and metadata for communication (sends).
Sets up and executes a communication plan for a Tpetra DistObject.
static bool verbose()
Whether Tpetra is in verbose mode.
CombineMode
Rule for combining data in an Import or Export.
bool reallocDualViewIfNeeded(Kokkos::DualView< ValueType *, DeviceType > &dv, const size_t newSize, const char newLabel[], const size_t tooBigFactor=2, const bool needFenceBeforeRealloc=true)
Reallocate the DualView in/out argument, if needed.
Abstract base class for objects that can be the source of an Import or Export operation.
Declaration and definition of Tpetra::Details::reallocDualViewIfNeeded, an implementation detail of T...
LocalOrdinal local_ordinal_type
The type of local indices.
Replace old values with zero.
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
ReverseOption
Whether the data transfer should be performed in forward or reverse mode.
DistObject(const Teuchos::RCP< const map_type > &map)
Constructor.
std::string dualViewStatusToString(const DualViewType &dv, const char name[])
Return the status of the given Kokkos::DualView, as a human-readable string.
virtual std::string description() const
One-line descriptiion of this object.
bool transferArrived() const
Whether the data from an import/export operation has arrived, and is ready for the unpack and combine...
virtual size_t constantNumberOfPackets() const
Whether the implementation&#39;s instance promises always to have a constant number of packets per LID (l...
virtual bool reallocImportsIfNeeded(const size_t newSize, const bool verbose, const std::string *prefix, const bool remoteLIDsContiguous=false, const CombineMode CM=INSERT)
Reallocate imports_ if needed.
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Export data into this object using an Export object (&quot;forward mode&quot;).
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
Stand-alone utility functions and macros.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print a descriptiion of this object to the given output stream.
Kokkos::Device< typename device_type::execution_space, buffer_memory_space > buffer_device_type
Kokkos::Device specialization for communication buffers.
Base class for distributed Tpetra objects that support data redistribution.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
Definition: Tpetra_Util.cpp:71
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap)
Remove processes which contain no entries in this object&#39;s Map.
Description of Tpetra&#39;s behavior.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.