Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_DistObject_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ************************************************************************
38 // @HEADER
39 
40 #ifndef TPETRA_DISTOBJECT_DEF_HPP
41 #define TPETRA_DISTOBJECT_DEF_HPP
42 
50 
51 #include "Tpetra_Distributor.hpp"
54 #include "Tpetra_Details_checkGlobalError.hpp"
56 #include "Tpetra_Util.hpp" // Details::createPrefix
57 #include "Teuchos_CommHelpers.hpp"
58 #include "Teuchos_TypeNameTraits.hpp"
59 #include <typeinfo>
60 #include <memory>
61 #include <sstream>
62 
63 namespace Tpetra {
64 
65  namespace { // (anonymous)
66  template<class DeviceType, class IndexType = size_t>
67  struct SumFunctor {
68  SumFunctor (const Kokkos::View<const size_t*, DeviceType>& viewToSum) :
69  viewToSum_ (viewToSum) {}
70  KOKKOS_INLINE_FUNCTION void operator() (const IndexType i, size_t& lclSum) const {
71  lclSum += viewToSum_(i);
72  }
73  Kokkos::View<const size_t*, DeviceType> viewToSum_;
74  };
75 
76  template<class DeviceType, class IndexType = size_t>
77  size_t
78  countTotalImportPackets (const Kokkos::View<const size_t*, DeviceType>& numImportPacketsPerLID)
79  {
80  using Kokkos::parallel_reduce;
81  typedef DeviceType DT;
82  typedef typename DT::execution_space DES;
83  typedef Kokkos::RangePolicy<DES, IndexType> range_type;
84 
85  const IndexType numOut = numImportPacketsPerLID.extent (0);
86  size_t totalImportPackets = 0;
87  parallel_reduce ("Count import packets",
88  range_type (0, numOut),
89  SumFunctor<DeviceType, IndexType> (numImportPacketsPerLID),
90  totalImportPackets);
91  return totalImportPackets;
92  }
93  } // namespace (anonymous)
94 
95 
96  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
98  DistObject (const Teuchos::RCP<const map_type>& map) :
99  map_ (map)
100  {
101 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
102  using Teuchos::RCP;
103  using Teuchos::Time;
104  using Teuchos::TimeMonitor;
105 
106  RCP<Time> doXferTimer =
107  TimeMonitor::lookupCounter ("Tpetra::DistObject::doTransfer");
108  if (doXferTimer.is_null ()) {
109  doXferTimer =
110  TimeMonitor::getNewCounter ("Tpetra::DistObject::doTransfer");
111  }
112  doXferTimer_ = doXferTimer;
113 
114  RCP<Time> copyAndPermuteTimer =
115  TimeMonitor::lookupCounter ("Tpetra::DistObject::copyAndPermute");
116  if (copyAndPermuteTimer.is_null ()) {
117  copyAndPermuteTimer =
118  TimeMonitor::getNewCounter ("Tpetra::DistObject::copyAndPermute");
119  }
120  copyAndPermuteTimer_ = copyAndPermuteTimer;
121 
122  RCP<Time> packAndPrepareTimer =
123  TimeMonitor::lookupCounter ("Tpetra::DistObject::packAndPrepare");
124  if (packAndPrepareTimer.is_null ()) {
125  packAndPrepareTimer =
126  TimeMonitor::getNewCounter ("Tpetra::DistObject::packAndPrepare");
127  }
128  packAndPrepareTimer_ = packAndPrepareTimer;
129 
130  RCP<Time> doPostsAndWaitsTimer =
131  TimeMonitor::lookupCounter ("Tpetra::DistObject::doPostsAndWaits");
132  if (doPostsAndWaitsTimer.is_null ()) {
133  doPostsAndWaitsTimer =
134  TimeMonitor::getNewCounter ("Tpetra::DistObject::doPostsAndWaits");
135  }
136  doPostsAndWaitsTimer_ = doPostsAndWaitsTimer;
137 
138  RCP<Time> unpackAndCombineTimer =
139  TimeMonitor::lookupCounter ("Tpetra::DistObject::unpackAndCombine");
140  if (unpackAndCombineTimer.is_null ()) {
141  unpackAndCombineTimer =
142  TimeMonitor::getNewCounter ("Tpetra::DistObject::unpackAndCombine");
143  }
144  unpackAndCombineTimer_ = unpackAndCombineTimer;
145 #endif // HAVE_TPETRA_TRANSFER_TIMERS
146  }
147 
148  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
149  std::string
151  description () const
152  {
153  using Teuchos::TypeNameTraits;
154 
155  std::ostringstream os;
156  os << "\"Tpetra::DistObject\": {"
157  << "Packet: " << TypeNameTraits<packet_type>::name ()
158  << ", LocalOrdinal: " << TypeNameTraits<local_ordinal_type>::name ()
159  << ", GlobalOrdinal: " << TypeNameTraits<global_ordinal_type>::name ()
160  << ", Node: " << TypeNameTraits<Node>::name ();
161  if (this->getObjectLabel () != "") {
162  os << "Label: \"" << this->getObjectLabel () << "\"";
163  }
164  os << "}";
165  return os.str ();
166  }
167 
168  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
169  void
171  describe (Teuchos::FancyOStream &out,
172  const Teuchos::EVerbosityLevel verbLevel) const
173  {
174  using Teuchos::rcpFromRef;
175  using Teuchos::TypeNameTraits;
176  using std::endl;
177  const Teuchos::EVerbosityLevel vl = (verbLevel == Teuchos::VERB_DEFAULT) ?
178  Teuchos::VERB_LOW : verbLevel;
179  Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getMap ()->getComm ();
180  const int myRank = comm.is_null () ? 0 : comm->getRank ();
181  const int numProcs = comm.is_null () ? 1 : comm->getSize ();
182 
183  if (vl != Teuchos::VERB_NONE) {
184  Teuchos::OSTab tab0 (out);
185  if (myRank == 0) {
186  out << "\"Tpetra::DistObject\":" << endl;
187  }
188  Teuchos::OSTab tab1 (out);
189  if (myRank == 0) {
190  out << "Template parameters:" << endl;
191  {
192  Teuchos::OSTab tab2 (out);
193  out << "Packet: " << TypeNameTraits<packet_type>::name () << endl
194  << "LocalOrdinal: " << TypeNameTraits<local_ordinal_type>::name () << endl
195  << "GlobalOrdinal: " << TypeNameTraits<global_ordinal_type>::name () << endl
196  << "Node: " << TypeNameTraits<node_type>::name () << endl;
197  }
198  if (this->getObjectLabel () != "") {
199  out << "Label: \"" << this->getObjectLabel () << "\"" << endl;
200  }
201  } // if myRank == 0
202 
203  // Describe the Map.
204  {
205  if (myRank == 0) {
206  out << "Map:" << endl;
207  }
208  Teuchos::OSTab tab2 (out);
209  map_->describe (out, vl);
210  }
211 
212  // At verbosity > VERB_LOW, each process prints something.
213  if (vl > Teuchos::VERB_LOW) {
214  for (int p = 0; p < numProcs; ++p) {
215  if (myRank == p) {
216  out << "Process " << myRank << ":" << endl;
217  Teuchos::OSTab tab2 (out);
218  out << "Export buffer size (in packets): "
219  << exports_.extent (0)
220  << endl
221  << "Import buffer size (in packets): "
222  << imports_.extent (0)
223  << endl;
224  }
225  if (! comm.is_null ()) {
226  comm->barrier (); // give output time to finish
227  comm->barrier ();
228  comm->barrier ();
229  }
230  } // for each process rank p
231  } // if vl > VERB_LOW
232  } // if vl != VERB_NONE
233  }
234 
235  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
236  void
238  removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& /* newMap */)
239  {
240  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
241  "Tpetra::DistObject::removeEmptyProcessesInPlace: Not implemented");
242  }
243 
244  /* These are provided in base DistObject template
245  template<class DistObjectType>
246  void
247  removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input,
248  const Teuchos::RCP<const Map<typename DistObjectType::local_ordinal_type,
249  typename DistObjectType::global_ordinal_type,
250  typename DistObjectType::node_type> >& newMap)
251  {
252  input->removeEmptyProcessesInPlace (newMap);
253  if (newMap.is_null ()) { // my process is excluded
254  input = Teuchos::null;
255  }
256  }
257 
258  template<class DistObjectType>
259  void
260  removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input)
261  {
262  using Teuchos::RCP;
263  typedef typename DistObjectType::local_ordinal_type LO;
264  typedef typename DistObjectType::global_ordinal_type GO;
265  typedef typename DistObjectType::node_type NT;
266  typedef Map<LO, GO, NT> map_type;
267 
268  RCP<const map_type> newMap = input->getMap ()->removeEmptyProcesses ();
269  removeEmptyProcessesInPlace<DistObjectType> (input, newMap);
270  }
271  */
272 
273  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
274  void
276  doImport (const SrcDistObject& source,
278  const CombineMode CM,
279  const bool restrictedMode)
280  {
281  using Details::Behavior;
282  using std::endl;
283  const char modeString[] = "doImport (forward mode)";
284 
285  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
286  // output to std::cerr on every MPI process. This is unwise for
287  // runs with large numbers of MPI processes.
288  const bool verbose = Behavior::verbose("DistObject");
289  std::unique_ptr<std::string> prefix;
290  if (verbose) {
291  prefix = this->createPrefix("DistObject", modeString);
292  std::ostringstream os;
293  os << *prefix << "Start" << endl;
294  std::cerr << os.str ();
295  }
296  this->doTransfer (source, importer, modeString, DoForward, CM,
297  restrictedMode);
298  if (verbose) {
299  std::ostringstream os;
300  os << *prefix << "Done" << endl;
301  std::cerr << os.str ();
302  }
303  }
304 
305  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
306  void
308  doExport (const SrcDistObject& source,
310  const CombineMode CM,
311  const bool restrictedMode)
312  {
313  using Details::Behavior;
314  using std::endl;
315  const char modeString[] = "doExport (forward mode)";
316 
317  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
318  // output to std::cerr on every MPI process. This is unwise for
319  // runs with large numbers of MPI processes.
320  const bool verbose = Behavior::verbose("DistObject");
321  std::unique_ptr<std::string> prefix;
322  if (verbose) {
323  prefix = this->createPrefix("DistObject", modeString);
324  std::ostringstream os;
325  os << *prefix << "Start" << endl;
326  std::cerr << os.str ();
327  }
328  this->doTransfer (source, exporter, modeString, DoForward, CM,
329  restrictedMode);
330  if (verbose) {
331  std::ostringstream os;
332  os << *prefix << "Done" << endl;
333  std::cerr << os.str ();
334  }
335  }
336 
337  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
338  void
340  doImport (const SrcDistObject& source,
342  const CombineMode CM,
343  const bool restrictedMode)
344  {
345  using Details::Behavior;
346  using std::endl;
347  const char modeString[] = "doImport (reverse mode)";
348 
349  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
350  // output to std::cerr on every MPI process. This is unwise for
351  // runs with large numbers of MPI processes.
352  const bool verbose = Behavior::verbose("DistObject");
353  std::unique_ptr<std::string> prefix;
354  if (verbose) {
355  prefix = this->createPrefix("DistObject", modeString);
356  std::ostringstream os;
357  os << *prefix << "Start" << endl;
358  std::cerr << os.str ();
359  }
360  this->doTransfer (source, exporter, modeString, DoReverse, CM,
361  restrictedMode);
362  if (verbose) {
363  std::ostringstream os;
364  os << *prefix << "Done" << endl;
365  std::cerr << os.str ();
366  }
367  }
368 
369  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
370  void
372  doExport (const SrcDistObject& source,
374  const CombineMode CM,
375  const bool restrictedMode)
376  {
377  using Details::Behavior;
378  using std::endl;
379  const char modeString[] = "doExport (reverse mode)";
380 
381  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
382  // output to std::cerr on every MPI process. This is unwise for
383  // runs with large numbers of MPI processes.
384  const bool verbose = Behavior::verbose("DistObject");
385  std::unique_ptr<std::string> prefix;
386  if (verbose) {
387  prefix = this->createPrefix("DistObject", modeString);
388  std::ostringstream os;
389  os << *prefix << "Start" << endl;
390  std::cerr << os.str ();
391  }
392  this->doTransfer (source, importer, modeString, DoReverse, CM,
393  restrictedMode);
394  if (verbose) {
395  std::ostringstream os;
396  os << *prefix << "Done" << endl;
397  std::cerr << os.str ();
398  }
399  }
400 
401  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
402  bool
404  isDistributed () const {
405  return map_->isDistributed ();
406  }
407 
408  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
409  size_t
412  return 0; // default implementation; subclasses may override
413  }
414 
415  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
416  void
419  const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
420  const char modeString[],
421  const ReverseOption revOp,
422  const CombineMode CM,
423  bool restrictedMode)
424  {
425  using Details::Behavior;
428  using std::endl;
429  const char funcName[] = "Tpetra::DistObject::doTransfer";
430 
431  ProfilingRegion region_doTransfer(funcName);
432  const bool verbose = Behavior::verbose("DistObject");
433  std::unique_ptr<std::string> prefix;
434  if (verbose) {
435  std::ostringstream os;
436  prefix = this->createPrefix("DistObject", "doTransfer");
437  os << *prefix << "Source type: " << Teuchos::typeName(src)
438  << ", Target type: " << Teuchos::typeName(*this) << endl;
439  std::cerr << os.str();
440  }
441 
442  // "Restricted Mode" does two things:
443  // 1) Skips copyAndPermute
444  // 2) Allows the "target" Map of the transfer to be a subset of
445  // the Map of *this, in a "locallyFitted" sense.
446  //
447  // This cannot be used if #2 is not true, OR there are permutes.
448  // Source Maps still need to match
449 
450  // mfh 18 Oct 2017: Set TPETRA_DEBUG to true to enable extra debug
451  // checks. These may communicate more.
452  const bool debug = Behavior::debug("DistObject");
453  if (debug) {
454  if (! restrictedMode && revOp == DoForward) {
455  const bool myMapSameAsTransferTgtMap =
456  this->getMap ()->isSameAs (* (transfer.getTargetMap ()));
457  TEUCHOS_TEST_FOR_EXCEPTION
458  (! myMapSameAsTransferTgtMap, std::invalid_argument,
459  "Tpetra::DistObject::" << modeString << ": For forward-mode "
460  "communication, the target DistObject's Map must be the same "
461  "(in the sense of Tpetra::Map::isSameAs) as the input "
462  "Export/Import object's target Map.");
463  }
464  else if (! restrictedMode && revOp == DoReverse) {
465  const bool myMapSameAsTransferSrcMap =
466  this->getMap ()->isSameAs (* (transfer.getSourceMap ()));
467  TEUCHOS_TEST_FOR_EXCEPTION
468  (! myMapSameAsTransferSrcMap, std::invalid_argument,
469  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
470  "communication, the target DistObject's Map must be the same "
471  "(in the sense of Tpetra::Map::isSameAs) as the input "
472  "Export/Import object's source Map.");
473  }
474  else if (restrictedMode && revOp == DoForward) {
475  const bool myMapLocallyFittedTransferTgtMap =
476  this->getMap ()->isLocallyFitted (* (transfer.getTargetMap ()));
477  TEUCHOS_TEST_FOR_EXCEPTION
478  (! myMapLocallyFittedTransferTgtMap , std::invalid_argument,
479  "Tpetra::DistObject::" << modeString << ": For forward-mode "
480  "communication using restricted mode, Export/Import object's "
481  "target Map must be locally fitted (in the sense of "
482  "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
483  }
484  else { // if (restrictedMode && revOp == DoReverse) {
485  const bool myMapLocallyFittedTransferSrcMap =
486  this->getMap ()->isLocallyFitted (* (transfer.getSourceMap ()));
487  TEUCHOS_TEST_FOR_EXCEPTION
488  (! myMapLocallyFittedTransferSrcMap, std::invalid_argument,
489  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
490  "communication using restricted mode, Export/Import object's "
491  "source Map must be locally fitted (in the sense of "
492  "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
493  }
494 
495  // SrcDistObject need not even _have_ Maps. However, if the
496  // source object is a DistObject, it has a Map, and we may
497  // compare that Map with the Transfer's Maps.
498  const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
499  if (srcDistObj != nullptr) {
500  if (revOp == DoForward) {
501  const bool srcMapSameAsImportSrcMap =
502  srcDistObj->getMap ()->isSameAs (* (transfer.getSourceMap ()));
503  TEUCHOS_TEST_FOR_EXCEPTION
504  (! srcMapSameAsImportSrcMap, std::invalid_argument,
505  "Tpetra::DistObject::" << modeString << ": For forward-mode "
506  "communication, the source DistObject's Map must be the same "
507  "as the input Export/Import object's source Map.");
508  }
509  else { // revOp == DoReverse
510  const bool srcMapSameAsImportTgtMap =
511  srcDistObj->getMap ()->isSameAs (* (transfer.getTargetMap ()));
512  TEUCHOS_TEST_FOR_EXCEPTION
513  (! srcMapSameAsImportTgtMap, std::invalid_argument,
514  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
515  "communication, the source DistObject's Map must be the same "
516  "as the input Export/Import object's target Map.");
517  }
518  }
519  }
520 
521  const size_t numSameIDs = transfer.getNumSameIDs ();
522  Distributor& distor = transfer.getDistributor ();
523 
524  TEUCHOS_TEST_FOR_EXCEPTION
525  (debug && restrictedMode &&
526  (transfer.getPermuteToLIDs_dv().extent(0) != 0 ||
527  transfer.getPermuteFromLIDs_dv().extent(0) != 0),
528  std::invalid_argument,
529  "Tpetra::DistObject::" << modeString << ": Transfer object "
530  "cannot have permutes in restricted mode.");
531 
532  // Do we need all communication buffers to live on host?
533  const bool commOnHost = ! Behavior::assumeMpiIsCudaAware ();
534  if (verbose) {
535  std::ostringstream os;
536  os << *prefix << "doTransfer: Use new interface; "
537  "commOnHost=" << (commOnHost ? "true" : "false") << endl;
538  std::cerr << os.str ();
539  }
540 
541  using const_lo_dv_type =
542  Kokkos::DualView<const local_ordinal_type*, buffer_device_type>;
543  const_lo_dv_type permToLIDs = (revOp == DoForward) ?
544  transfer.getPermuteToLIDs_dv () :
545  transfer.getPermuteFromLIDs_dv ();
546  const_lo_dv_type permFromLIDs = (revOp == DoForward) ?
547  transfer.getPermuteFromLIDs_dv () :
548  transfer.getPermuteToLIDs_dv ();
549  const_lo_dv_type remoteLIDs = (revOp == DoForward) ?
550  transfer.getRemoteLIDs_dv () :
551  transfer.getExportLIDs_dv ();
552  const_lo_dv_type exportLIDs = (revOp == DoForward) ?
553  transfer.getExportLIDs_dv () :
554  transfer.getRemoteLIDs_dv ();
555  doTransferNew (src, CM, numSameIDs, permToLIDs, permFromLIDs,
556  remoteLIDs, exportLIDs, distor, revOp, commOnHost,
557  restrictedMode);
558 
559  if (verbose) {
560  std::ostringstream os;
561  os << *prefix << "Tpetra::DistObject::doTransfer: Done!" << endl;
562  std::cerr << os.str ();
563  }
564  }
565 
566  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
567  bool
569  reallocImportsIfNeeded (const size_t newSize,
570  const bool verbose,
571  const std::string* prefix)
572  {
573  if (verbose) {
574  std::ostringstream os;
575  os << *prefix << "Realloc (if needed) imports_ from "
576  << imports_.extent (0) << " to " << newSize << std::endl;
577  std::cerr << os.str ();
578  }
580  const bool reallocated =
581  reallocDualViewIfNeeded (this->imports_, newSize, "imports");
582  if (verbose) {
583  std::ostringstream os;
584  os << *prefix << "Finished realloc'ing imports_" << std::endl;
585  std::cerr << os.str ();
586  }
587  return reallocated;
588  }
589 
590  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
591  bool
593  reallocArraysForNumPacketsPerLid (const size_t numExportLIDs,
594  const size_t numImportLIDs)
595  {
596  using Details::Behavior;
599  using std::endl;
600  // If an array is already allocated, and if is at least
601  // tooBigFactor times bigger than it needs to be, free it and
602  // reallocate to the size we need, in order to save space.
603  // Otherwise, take subviews to reduce allocation size.
604  constexpr size_t tooBigFactor = 10;
605 
606  const bool verbose = Behavior::verbose("DistObject");
607  std::unique_ptr<std::string> prefix;
608  if (verbose) {
609  prefix = this->createPrefix("DistObject",
610  "reallocArraysForNumPacketsPerLid");
611  std::ostringstream os;
612  os << *prefix
613  << "numExportLIDs: " << numExportLIDs
614  << ", numImportLIDs: " << numImportLIDs
615  << endl;
616  os << *prefix << "DualView status before:" << endl
617  << *prefix
618  << dualViewStatusToString (this->numExportPacketsPerLID_,
619  "numExportPacketsPerLID_")
620  << endl
621  << *prefix
622  << dualViewStatusToString (this->numImportPacketsPerLID_,
623  "numImportPacketsPerLID_")
624  << endl;
625  std::cerr << os.str ();
626  }
627 
628  // Reallocate numExportPacketsPerLID_ if needed.
629  const bool firstReallocated =
630  reallocDualViewIfNeeded (this->numExportPacketsPerLID_,
631  numExportLIDs,
632  "numExportPacketsPerLID",
633  tooBigFactor,
634  true); // need fence before, if realloc'ing
635 
636  // If we reallocated above, then we fenced after that
637  // reallocation. This means that we don't need to fence again,
638  // before the next reallocation.
639  const bool needFenceBeforeNextAlloc = ! firstReallocated;
640  const bool secondReallocated =
641  reallocDualViewIfNeeded (this->numImportPacketsPerLID_,
642  numImportLIDs,
643  "numImportPacketsPerLID",
644  tooBigFactor,
645  needFenceBeforeNextAlloc);
646 
647  if (verbose) {
648  std::ostringstream os;
649  os << *prefix << "DualView status after:" << endl
650  << *prefix << dualViewStatusToString (this->numExportPacketsPerLID_,
651  "numExportPacketsPerLID_")
652  << endl
653  << *prefix << dualViewStatusToString (this->numImportPacketsPerLID_,
654  "numImportPacketsPerLID_")
655  << endl;
656  std::cerr << os.str ();
657  }
658 
659  return firstReallocated || secondReallocated;
660  }
661 
662  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
663  void
666  const CombineMode CM,
667  const size_t numSameIDs,
668  const Kokkos::DualView<const local_ordinal_type*,
669  buffer_device_type>& permuteToLIDs,
670  const Kokkos::DualView<const local_ordinal_type*,
671  buffer_device_type>& permuteFromLIDs,
672  const Kokkos::DualView<const local_ordinal_type*,
673  buffer_device_type>& remoteLIDs,
674  const Kokkos::DualView<const local_ordinal_type*,
675  buffer_device_type>& exportLIDs,
676  Distributor& distor,
677  const ReverseOption revOp,
678  const bool commOnHost,
679  const bool restrictedMode)
680  {
681  using Details::Behavior;
685  using Kokkos::Compat::getArrayView;
686  using Kokkos::Compat::getConstArrayView;
687  using Kokkos::Compat::getKokkosViewDeepCopy;
688  using Kokkos::Compat::create_const_view;
689  using std::endl;
690  using DT = device_type;
691  using DES = typename DT::execution_space;
692  const char funcName[] = "Tpetra::DistObject::doTransferNew";
693 
694  ProfilingRegion region_dTN(funcName);
695 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
696  // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in favor
697  // of Kokkos profiling.
698  Teuchos::TimeMonitor doXferMon (*doXferTimer_);
699 #endif // HAVE_TPETRA_TRANSFER_TIMERS
700 
701  const bool debug = Behavior::debug("DistObject");
702  const bool verbose = Behavior::verbose("DistObject");
703  // Prefix for verbose output. Use a pointer, so we don't pay for
704  // string construction unless needed. We set this below.
705  std::unique_ptr<std::string> prefix;
706  if (verbose) {
707  prefix = this->createPrefix("DistObject", "doTransferNew");
708  }
709 
710  if (verbose) {
711  std::ostringstream os;
712  os << *prefix << "Input arguments:" << endl
713  << *prefix << " combineMode: " << combineModeToString (CM) << endl
714  << *prefix << " numSameIDs: " << numSameIDs << endl
715  << *prefix << " "
716  << dualViewStatusToString (permuteToLIDs, "permuteToLIDs") << endl
717  << *prefix << " "
718  << dualViewStatusToString (permuteFromLIDs, "permuteFromLIDs") << endl
719  << *prefix << " "
720  << dualViewStatusToString (remoteLIDs, "remoteLIDs") << endl
721  << *prefix << " "
722  << dualViewStatusToString (exportLIDs, "exportLIDs") << endl
723  << *prefix << " revOp: Do" << (revOp == DoReverse ? "Reverse" : "Forward") << endl
724  << *prefix << " commOnHost: " << (commOnHost ? "true" : "false") << endl;
725  std::cerr << os.str ();
726  }
727 
728  {
729  ProfilingRegion region_cs ("Tpetra::DistObject::doTransferNew::checkSizes");
730  if (verbose) {
731  std::ostringstream os;
732  os << *prefix << "1. checkSizes" << endl;
733  std::cerr << os.str ();
734  }
735  const bool checkSizesResult = this->checkSizes (src);
736  TEUCHOS_TEST_FOR_EXCEPTION
737  (! checkSizesResult, std::invalid_argument,
738  "Tpetra::DistObject::doTransfer: checkSizes() indicates that the "
739  "destination object is not a legal target for redistribution from the "
740  "source object. This probably means that they do not have the same "
741  "dimensions. For example, MultiVectors must have the same number of "
742  "rows and columns.");
743  }
744 
745  // NOTE (mfh 26 Apr 2016) Chris Baker's implementation understood
746  // that if CM == INSERT || CM == REPLACE, the target object could
747  // be write only. We don't optimize for that here.
748 
749  if (!restrictedMode && numSameIDs + permuteToLIDs.extent (0) != 0) {
750  // There is at least one GID to copy or permute.
751  if (verbose) {
752  std::ostringstream os;
753  os << *prefix << "2. copyAndPermute" << endl;
754  std::cerr << os.str ();
755  }
756  ProfilingRegion region_cp
757  ("Tpetra::DistObject::doTransferNew::copyAndPermute");
758 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
759  // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in favor
760  // of Kokkos profiling.
761  Teuchos::TimeMonitor copyAndPermuteMon (*copyAndPermuteTimer_);
762 #endif // HAVE_TPETRA_TRANSFER_TIMERS
763 
764  if (numSameIDs + permuteToLIDs.extent (0) != 0) {
765  // There is at least one GID to copy or permute.
766  if (verbose) {
767  std::ostringstream os;
768  os << *prefix << "2. copyAndPermute" << endl;
769  std::cerr << os.str ();
770  }
771  this->copyAndPermute (src, numSameIDs, permuteToLIDs,
772  permuteFromLIDs);
773  if (verbose) {
774  std::ostringstream os;
775  os << *prefix << "After copyAndPermute:" << endl
776  << *prefix << " "
777  << dualViewStatusToString (permuteToLIDs, "permuteToLIDs")
778  << endl
779  << *prefix << " "
780  << dualViewStatusToString (permuteFromLIDs, "permuteFromLIDs")
781  << endl;
782  std::cerr << os.str ();
783  }
784  }
785  }
786 
787  // The method may return zero even if the implementation actually
788  // does have a constant number of packets per LID. However, if it
789  // returns nonzero, we may use this information to avoid
790  // (re)allocating num{Ex,Im}portPacketsPerLID_. packAndPrepare()
791  // will set this to its final value.
792  //
793  // We only need this if CM != ZERO, but it has to be lifted out of
794  // that scope because there are multiple tests for CM != ZERO.
795  size_t constantNumPackets = this->constantNumberOfPackets ();
796  if (verbose) {
797  std::ostringstream os;
798  os << *prefix << "constantNumPackets=" << constantNumPackets << endl;
799  std::cerr << os.str ();
800  }
801 
802  // We only need to pack communication buffers if the combine mode
803  // is not ZERO. A "ZERO combine mode" means that the results are
804  // the same as if we had received all zeros, and added them to the
805  // existing values. That means we don't need to communicate.
806  if (CM != ZERO) {
807  if (constantNumPackets == 0) {
808  if (verbose) {
809  std::ostringstream os;
810  os << *prefix << "3. (Re)allocate num{Ex,Im}portPacketsPerLID"
811  << endl;
812  std::cerr << os.str ();
813  }
814  // This only reallocates if necessary, that is, if the sizes
815  // don't match.
816  this->reallocArraysForNumPacketsPerLid (exportLIDs.extent (0),
817  remoteLIDs.extent (0));
818  }
819 
820  if (verbose) {
821  std::ostringstream os;
822  os << *prefix << "4. packAndPrepare: before, "
823  << dualViewStatusToString (this->exports_, "exports_")
824  << endl;
825  std::cerr << os.str ();
826  }
827  {
828  ProfilingRegion region_pp
829  ("Tpetra::DistObject::doTransferNew::packAndPrepare");
830 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
831  // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in
832  // favor of Kokkos profiling.
833  Teuchos::TimeMonitor packAndPrepareMon (*packAndPrepareTimer_);
834 #endif // HAVE_TPETRA_TRANSFER_TIMERS
835 
836  // Ask the source to pack data. Also ask it whether there are
837  // a constant number of packets per element
838  // (constantNumPackets is an output argument). If there are,
839  // constantNumPackets will come back nonzero. Otherwise, the
840  // source will fill the numExportPacketsPerLID_ array.
841 
842  // FIXME (mfh 18 Oct 2017) if (! commOnHost), sync to device?
843  // Alternately, make packAndPrepare take a "commOnHost"
844  // argument to tell it where to leave the data?
845  //
846  // NOTE (mfh 04 Feb 2019) Subclasses of DistObject should have
847  // the freedom to pack and unpack either on host or device.
848  // We should prefer sync'ing only on demand. Thus, we can
849  // answer the above question: packAndPrepare should not
850  // take a commOnHost argument, and doTransferNew should sync
851  // where needed, if needed.
852  this->packAndPrepare (src, exportLIDs, this->exports_,
853  this->numExportPacketsPerLID_,
854  constantNumPackets, distor);
855  if (commOnHost) {
856  if (this->exports_.need_sync_host ()) {
857  this->exports_.sync_host ();
858  }
859  }
860  else { // ! commOnHost
861  if (this->exports_.need_sync_device ()) {
862  this->exports_.sync_device ();
863  }
864  }
865  }
866  if (verbose) {
867  std::ostringstream os;
868  os << *prefix << "5.1. After packAndPrepare, "
869  << dualViewStatusToString (this->exports_, "exports_")
870  << endl;
871  std::cerr << os.str ();
872  }
873  } // if (CM != ZERO)
874 
875  // We only need to send data if the combine mode is not ZERO.
876  if (CM != ZERO) {
877  if (constantNumPackets != 0) {
878  // There are a constant number of packets per element. We
879  // already know (from the number of "remote" (incoming)
880  // elements) how many incoming elements we expect, so we can
881  // resize the buffer accordingly.
882  const size_t rbufLen = remoteLIDs.extent (0) * constantNumPackets;
883  reallocImportsIfNeeded (rbufLen, verbose, prefix.get ());
884  }
885 
886  // Do we need to do communication (via doPostsAndWaits)?
887  bool needCommunication = true;
888 
889  // This may be NULL. It will be used below.
890  const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
891 
892  if (revOp == DoReverse && ! this->isDistributed ()) {
893  needCommunication = false;
894  }
895  // FIXME (mfh 30 Jun 2013): Checking whether the source object
896  // is distributed requires a cast to DistObject. If it's not a
897  // DistObject, then I'm not quite sure what to do. Perhaps it
898  // would be more appropriate for SrcDistObject to have an
899  // isDistributed() method. For now, I'll just assume that we
900  // need to do communication unless the cast succeeds and the
901  // source is not distributed.
902  else if (revOp == DoForward && srcDistObj != NULL &&
903  ! srcDistObj->isDistributed ()) {
904  needCommunication = false;
905  }
906 
907  if (! needCommunication) {
908  if (verbose) {
909  std::ostringstream os;
910  os << *prefix << "Comm not needed; skipping" << endl;
911  std::cerr << os.str ();
912  }
913  }
914  else {
915  ProfilingRegion region_dpw
916  ("Tpetra::DistObject::doTransferNew::doPostsAndWaits");
917 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
918  // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in
919  // favor of Kokkos profiling.
920  Teuchos::TimeMonitor doPostsAndWaitsMon (*doPostsAndWaitsTimer_);
921 #endif // HAVE_TPETRA_TRANSFER_TIMERS
922 
923  if (verbose) {
924  std::ostringstream os;
925  os << *prefix << "7.0. "
926  << (revOp == DoReverse ? "Reverse" : "Forward")
927  << " mode" << endl;
928  std::cerr << os.str ();
929  }
930 
931  if (constantNumPackets == 0) { // variable num packets per LID
932  if (verbose) {
933  std::ostringstream os;
934  os << *prefix << "7.1. Variable # packets / LID: first comm "
935  << "(commOnHost = " << (commOnHost ? "true" : "false") << ")"
936  << endl;
937  std::cerr << os.str ();
938  }
939  size_t totalImportPackets = 0;
940  if (commOnHost) {
941  if (this->numExportPacketsPerLID_.need_sync_host ()) {
942  this->numExportPacketsPerLID_.sync_host ();
943  }
944  if (this->numImportPacketsPerLID_.need_sync_host ()) {
945  this->numImportPacketsPerLID_.sync_host ();
946  }
947  this->numImportPacketsPerLID_.modify_host (); // out arg
948  auto numExp_h =
949  create_const_view (this->numExportPacketsPerLID_.view_host ());
950  auto numImp_h = this->numImportPacketsPerLID_.view_host ();
951 
952  // MPI communication happens here.
953  if (verbose) {
954  std::ostringstream os;
955  os << *prefix << "Call do"
956  << (revOp == DoReverse ? "Reverse" : "") << "PostsAndWaits"
957  << endl;
958  std::cerr << os.str ();
959  }
960  if (revOp == DoReverse) {
961  distor.doReversePostsAndWaits (numExp_h, 1, numImp_h);
962  }
963  else {
964  distor.doPostsAndWaits (numExp_h, 1, numImp_h);
965  }
966 
967  if (verbose) {
968  std::ostringstream os;
969  os << *prefix << "Count totalImportPackets" << std::endl;
970  std::cerr << os.str ();
971  }
972  using the_dev_type = typename decltype (numImp_h)::device_type;
973  totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_h);
974  }
975  else { // ! commOnHost
976  if (this->numExportPacketsPerLID_.need_sync_device ()) {
977  this->numExportPacketsPerLID_.sync_device ();
978  }
979  if (this->numImportPacketsPerLID_.need_sync_device ()) {
980  this->numImportPacketsPerLID_.sync_device ();
981  }
982  this->numImportPacketsPerLID_.modify_device (); // out arg
983  auto numExp_d = create_const_view
984  (this->numExportPacketsPerLID_.view_device ());
985  auto numImp_d = this->numImportPacketsPerLID_.view_device ();
986 
987  // MPI communication happens here.
988  if (verbose) {
989  std::ostringstream os;
990  os << *prefix << "Call do"
991  << (revOp == DoReverse ? "Reverse" : "") << "PostsAndWaits"
992  << endl;
993  std::cerr << os.str ();
994  }
995  if (revOp == DoReverse) {
996  distor.doReversePostsAndWaits (numExp_d, 1, numImp_d);
997  }
998  else {
999  distor.doPostsAndWaits (numExp_d, 1, numImp_d);
1000  }
1001 
1002  if (verbose) {
1003  std::ostringstream os;
1004  os << *prefix << "Count totalImportPackets" << std::endl;
1005  std::cerr << os.str ();
1006  }
1007  using the_dev_type = typename decltype (numImp_d)::device_type;
1008  totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_d);
1009  }
1010 
1011  if (verbose) {
1012  std::ostringstream os;
1013  os << *prefix << "totalImportPackets=" << totalImportPackets << endl;
1014  std::cerr << os.str ();
1015  }
1016  this->reallocImportsIfNeeded (totalImportPackets, verbose,
1017  prefix.get ());
1018  if (verbose) {
1019  std::ostringstream os;
1020  os << *prefix << "7.3. Second comm" << std::endl;
1021  std::cerr << os.str ();
1022  }
1023 
1024  // mfh 04 Feb 2019: Distributor expects the "num packets per
1025  // LID" arrays on host, so that it can issue MPI sends and
1026  // receives correctly.
1027  if (this->numExportPacketsPerLID_.need_sync_host ()) {
1028  this->numExportPacketsPerLID_.sync_host ();
1029  }
1030  if (this->numImportPacketsPerLID_.need_sync_host ()) {
1031  this->numImportPacketsPerLID_.sync_host ();
1032  }
1033 
1034  // NOTE (mfh 25 Apr 2016, 01 Aug 2017) doPostsAndWaits and
1035  // doReversePostsAndWaits currently want
1036  // numExportPacketsPerLID and numImportPacketsPerLID as
1037  // Teuchos::ArrayView, rather than as Kokkos::View.
1038  //
1039  // NOTE (mfh 04 Feb 2019) This does NOT copy from host to
1040  // device. The above syncs might.
1041  auto numExportPacketsPerLID_av =
1042  getArrayViewFromDualView (this->numExportPacketsPerLID_);
1043  auto numImportPacketsPerLID_av =
1044  getArrayViewFromDualView (this->numImportPacketsPerLID_);
1045 
1046  // imports_ is for output only, so we don't need to sync it
1047  // before marking it as modified. However, in order to
1048  // prevent spurious debug-mode errors (e.g., "modified on
1049  // both device and host"), we first need to clear its
1050  // "modified" flags.
1051  this->imports_.clear_sync_state ();
1052 
1053  if (verbose) {
1054  std::ostringstream os;
1055  os << *prefix << "Comm on "
1056  << (commOnHost ? "host" : "device")
1057  << "; call do" << (revOp == DoReverse ? "Reverse" : "")
1058  << "PostsAndWaits" << endl;
1059  std::cerr << os.str ();
1060  }
1061 
1062  if (commOnHost) {
1063  this->imports_.modify_host ();
1064  if (revOp == DoReverse) {
1065  distor.doReversePostsAndWaits
1066  (create_const_view (this->exports_.view_host ()),
1067  numExportPacketsPerLID_av,
1068  this->imports_.view_host (),
1069  numImportPacketsPerLID_av);
1070  }
1071  else {
1072  distor.doPostsAndWaits
1073  (create_const_view (this->exports_.view_host ()),
1074  numExportPacketsPerLID_av,
1075  this->imports_.view_host (),
1076  numImportPacketsPerLID_av);
1077  }
1078  }
1079  else { // pack on device
1080  Kokkos::fence(); // for UVM
1081  this->imports_.modify_device ();
1082  if (revOp == DoReverse) {
1083  distor.doReversePostsAndWaits
1084  (create_const_view (this->exports_.view_device ()),
1085  numExportPacketsPerLID_av,
1086  this->imports_.view_device (),
1087  numImportPacketsPerLID_av);
1088  }
1089  else {
1090  distor.doPostsAndWaits
1091  (create_const_view (this->exports_.view_device ()),
1092  numExportPacketsPerLID_av,
1093  this->imports_.view_device (),
1094  numImportPacketsPerLID_av);
1095  }
1096  }
1097  }
1098  else { // constant number of packets per LID
1099  if (verbose) {
1100  std::ostringstream os;
1101  os << *prefix << "7.1. Const # packets per LID: " << endl
1102  << *prefix << " "
1103  << dualViewStatusToString (this->exports_, "exports_")
1104  << endl
1105  << *prefix << " "
1106  << dualViewStatusToString (this->exports_, "imports_")
1107  << endl;
1108  std::cerr << os.str ();
1109  }
1110  // imports_ is for output only, so we don't need to sync it
1111  // before marking it as modified. However, in order to
1112  // prevent spurious debug-mode errors (e.g., "modified on
1113  // both device and host"), we first need to clear its
1114  // "modified" flags.
1115  this->imports_.clear_sync_state ();
1116 
1117  if (verbose) {
1118  std::ostringstream os;
1119  os << *prefix << "7.2. Comm on "
1120  << (commOnHost ? "host" : "device")
1121  << "; call do" << (revOp == DoReverse ? "Reverse" : "")
1122  << "PostsAndWaits" << endl;
1123  std::cerr << os.str ();
1124  }
1125  if (commOnHost) {
1126  this->imports_.modify_host ();
1127  if (revOp == DoReverse) {
1128  distor.doReversePostsAndWaits
1129  (create_const_view (this->exports_.view_host ()),
1130  constantNumPackets,
1131  this->imports_.view_host ());
1132  }
1133  else {
1134  distor.doPostsAndWaits
1135  (create_const_view (this->exports_.view_host ()),
1136  constantNumPackets,
1137  this->imports_.view_host ());
1138  }
1139  }
1140  else { // pack on device
1141  Kokkos::fence(); // for UVM
1142  this->imports_.modify_device ();
1143  if (revOp == DoReverse) {
1144  distor.doReversePostsAndWaits
1145  (create_const_view (this->exports_.view_device ()),
1146  constantNumPackets,
1147  this->imports_.view_device ());
1148  }
1149  else {
1150  distor.doPostsAndWaits
1151  (create_const_view (this->exports_.view_device ()),
1152  constantNumPackets,
1153  this->imports_.view_device ());
1154  }
1155  } // commOnHost
1156  } // constant or variable num packets per LID
1157 
1158  if (verbose) {
1159  std::ostringstream os;
1160  os << *prefix << "8. unpackAndCombine" << endl;
1161  std::cerr << os.str ();
1162  }
1163  ProfilingRegion region_uc
1164  ("Tpetra::DistObject::doTransferNew::unpackAndCombine");
1165 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
1166  // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in
1167  // favor of Kokkos profiling.
1168  Teuchos::TimeMonitor unpackAndCombineMon (*unpackAndCombineTimer_);
1169 #endif // HAVE_TPETRA_TRANSFER_TIMERS
1170 
1171  if (debug) {
1172  std::ostringstream lclErrStrm;
1173  bool lclSuccess = false;
1174  try {
1175  this->unpackAndCombine (remoteLIDs, this->imports_,
1176  this->numImportPacketsPerLID_,
1177  constantNumPackets, distor, CM);
1178  lclSuccess = true;
1179  }
1180  catch (std::exception& e) {
1181  lclErrStrm << "unpackAndCombine threw an exception: "
1182  << endl << e.what();
1183  }
1184  catch (...) {
1185  lclErrStrm << "unpackAndCombine threw an exception "
1186  "not a subclass of std::exception.";
1187  }
1188  const char gblErrMsgHeader[] = "Tpetra::DistObject::"
1189  "doTransferNew threw an exception in unpackAndCombine on "
1190  "one or more processes in the DistObject's communicator.";
1191  auto comm = getMap()->getComm();
1192  Details::checkGlobalError(std::cerr, lclSuccess,
1193  lclErrStrm.str().c_str(),
1194  gblErrMsgHeader, *comm);
1195  }
1196  else {
1197  this->unpackAndCombine (remoteLIDs, this->imports_,
1198  this->numImportPacketsPerLID_,
1199  constantNumPackets, distor, CM);
1200  }
1201  } // if (needCommunication)
1202  } // if (CM != ZERO)
1203 
1204  if (verbose) {
1205  std::ostringstream os;
1206  os << *prefix << "9. Done!" << endl;
1207  std::cerr << os.str ();
1208  }
1209  }
1210 
1211 
1212  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1213  void
1216  (const SrcDistObject&,
1217  const size_t,
1218  const Kokkos::DualView<
1219  const local_ordinal_type*,
1221  const Kokkos::DualView<
1222  const local_ordinal_type*,
1224  {}
1225 
1226  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1227  void
1230  (const SrcDistObject&,
1231  const Kokkos::DualView<
1232  const local_ordinal_type*,
1234  Kokkos::DualView<
1235  packet_type*,
1237  Kokkos::DualView<
1238  size_t*,
1240  size_t&,
1241  Distributor&)
1242  {}
1243 
1244  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1245  void
1248  (const Kokkos::DualView<
1249  const local_ordinal_type*,
1250  buffer_device_type>& /* importLIDs */,
1251  Kokkos::DualView<
1252  packet_type*,
1253  buffer_device_type> /* imports */,
1254  Kokkos::DualView<
1255  size_t*,
1256  buffer_device_type> /* numPacketsPerLID */,
1257  const size_t /* constantNumPackets */,
1258  Distributor& /* distor */,
1259  const CombineMode /* combineMode */)
1260  {}
1261 
1262 
1263  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1264  void
1266  print (std::ostream& os) const
1267  {
1268  using Teuchos::FancyOStream;
1269  using Teuchos::getFancyOStream;
1270  using Teuchos::RCP;
1271  using Teuchos::rcpFromRef;
1272  using std::endl;
1273 
1274  RCP<FancyOStream> out = getFancyOStream (rcpFromRef (os));
1275  this->describe (*out, Teuchos::VERB_DEFAULT);
1276  }
1277 
1278  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1279  std::unique_ptr<std::string>
1281  createPrefix(const char className[],
1282  const char methodName[]) const
1283  {
1284  auto map = this->getMap();
1285  auto comm = map.is_null() ? Teuchos::null : map->getComm();
1286  return Details::createPrefix(
1287  comm.getRawPtr(), className, methodName);
1288  }
1289 
1290  template<class DistObjectType>
1291  void
1293  Teuchos::RCP<DistObjectType>& input,
1294  const Teuchos::RCP<const Map<
1295  typename DistObjectType::local_ordinal_type,
1296  typename DistObjectType::global_ordinal_type,
1297  typename DistObjectType::node_type>>& newMap)
1298  {
1299  input->removeEmptyProcessesInPlace (newMap);
1300  if (newMap.is_null ()) { // my process is excluded
1301  input = Teuchos::null;
1302  }
1303  }
1304 
1305  template<class DistObjectType>
1306  void
1307  removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input)
1308  {
1309  auto newMap = input->getMap ()->removeEmptyProcesses ();
1310  removeEmptyProcessesInPlace<DistObjectType> (input, newMap);
1311  }
1312 
1313 // Explicit instantiation macro for general DistObject.
1314 #define TPETRA_DISTOBJECT_INSTANT(SCALAR, LO, GO, NODE) \
1315  template class DistObject< SCALAR , LO , GO , NODE >;
1316 
1317 // Explicit instantiation macro for DistObject<char, ...>.
1318 // The "SLGN" stuff above doesn't work for Packet=char.
1319 #define TPETRA_DISTOBJECT_INSTANT_CHAR(LO, GO, NODE) \
1320  template class DistObject< char , LO , GO , NODE >;
1321 
1322 } // namespace Tpetra
1323 
1324 #endif // TPETRA_DISTOBJECT_DEF_HPP
void doPostsAndWaits(const Teuchos::ArrayView< const Packet > &exports, size_t numPackets, const Teuchos::ArrayView< Packet > &imports)
Execute the (forward) communication plan.
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
void doImport(const SrcDistObject &source, const Import< LocalOrdinal, GlobalOrdinal, Node > &importer, const CombineMode CM, const bool restrictedMode=false)
Import data into this object using an Import object (&quot;forward mode&quot;).
virtual void doTransferNew(const SrcDistObject &src, const CombineMode CM, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &remoteLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Distributor &distor, const ReverseOption revOp, const bool commOnHost, const bool restrictedMode)
Implementation detail of doTransfer.
void print(std::ostream &os) const
Print this object to the given output stream.
virtual bool reallocArraysForNumPacketsPerLid(const size_t numExportLIDs, const size_t numImportLIDs)
Reallocate numExportPacketsPerLID_ and/or numImportPacketsPerLID_, if necessary.
bool isDistributed() const
Whether this is a globally distributed object.
void removeEmptyProcessesInPlace(Teuchos::RCP< DistObjectType > &input, const Teuchos::RCP< const Map< typename DistObjectType::local_ordinal_type, typename DistObjectType::global_ordinal_type, typename DistObjectType::node_type > > &newMap)
Remove processes which contain no elements in this object&#39;s Map.
virtual void doTransfer(const SrcDistObject &src, const ::Tpetra::Details::Transfer< local_ordinal_type, global_ordinal_type, node_type > &transfer, const char modeString[], const ReverseOption revOp, const CombineMode CM, const bool restrictedMode)
Redistribute data across (MPI) processes.
typename Node::device_type device_type
The Kokkos Device type.
void doReversePostsAndWaits(const Teuchos::ArrayView< const Packet > &exports, size_t numPackets, const Teuchos::ArrayView< Packet > &imports)
Execute the reverse communication plan.
typename::Kokkos::Details::ArithTraits< Packet >::val_type packet_type
The type of each datum being sent or received in an Import or Export.
Kokkos::DualView< T *, DT > getDualViewCopyFromArrayView(const Teuchos::ArrayView< const T > &x_av, const char label[], const bool leaveOnHost)
Get a 1-D Kokkos::DualView which is a deep copy of the input Teuchos::ArrayView (which views host mem...
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Sets up and executes a communication plan for a Tpetra DistObject.
CombineMode
Rule for combining data in an Import or Export.
bool reallocDualViewIfNeeded(Kokkos::DualView< ValueType *, DeviceType > &dv, const size_t newSize, const char newLabel[], const size_t tooBigFactor=2, const bool needFenceBeforeRealloc=true)
Reallocate the DualView in/out argument, if needed.
virtual void packAndPrepare(const SrcDistObject &source, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< packet_type *, buffer_device_type > &exports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, size_t &constantNumPackets, Distributor &distor)
Pack data and metadata for communication (sends).
Abstract base class for objects that can be the source of an Import or Export operation.
Declaration and definition of Tpetra::Details::reallocDualViewIfNeeded, an implementation detail of T...
bool reallocImportsIfNeeded(const size_t newSize, const bool verbose, const std::string *prefix)
Reallocate imports_ if needed.
LocalOrdinal local_ordinal_type
The type of local indices.
Replace old values with zero.
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
ReverseOption
Whether the data transfer should be performed in forward or reverse mode.
DistObject(const Teuchos::RCP< const map_type > &map)
Constructor.
std::string dualViewStatusToString(const DualViewType &dv, const char name[])
Return the status of the given Kokkos::DualView, as a human-readable string.
virtual std::string description() const
One-line descriptiion of this object.
virtual size_t constantNumberOfPackets() const
Whether the implementation&#39;s instance promises always to have a constant number of packets per LID (l...
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Export data into this object using an Export object (&quot;forward mode&quot;).
virtual void copyAndPermute(const SrcDistObject &source, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs)
Perform copies and permutations that are local to the calling (MPI) process.
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
Stand-alone utility functions and macros.
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, Distributor &distor, const CombineMode combineMode)
Perform any unpacking and combining after communication.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print a descriptiion of this object to the given output stream.
Kokkos::Device< typename device_type::execution_space, buffer_memory_space > buffer_device_type
Kokkos::Device specialization for communication buffers.
Base class for distributed Tpetra objects that support data redistribution.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap)
Remove processes which contain no entries in this object&#39;s Map.
Description of Tpetra&#39;s behavior.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.