Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_DistObject_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ************************************************************************
38 // @HEADER
39 
40 // clang-format off
41 #ifndef TPETRA_DISTOBJECT_DEF_HPP
42 #define TPETRA_DISTOBJECT_DEF_HPP
43 
51 
52 #include "Tpetra_Distributor.hpp"
55 #include "Tpetra_Details_checkGlobalError.hpp"
57 #include "Tpetra_Util.hpp" // Details::createPrefix
58 #include "Teuchos_CommHelpers.hpp"
59 #include "Teuchos_TypeNameTraits.hpp"
60 #include <typeinfo>
61 #include <memory>
62 #include <sstream>
63 
64 namespace Tpetra {
65 
66  namespace { // (anonymous)
67  template<class DeviceType, class IndexType = size_t>
68  struct SumFunctor {
69  SumFunctor (const Kokkos::View<const size_t*, DeviceType>& viewToSum) :
70  viewToSum_ (viewToSum) {}
71  KOKKOS_INLINE_FUNCTION void operator() (const IndexType i, size_t& lclSum) const {
72  lclSum += viewToSum_(i);
73  }
74  Kokkos::View<const size_t*, DeviceType> viewToSum_;
75  };
76 
77  template<class DeviceType, class IndexType = size_t>
78  size_t
79  countTotalImportPackets (const Kokkos::View<const size_t*, DeviceType>& numImportPacketsPerLID)
80  {
81  using Kokkos::parallel_reduce;
82  typedef DeviceType DT;
83  typedef typename DT::execution_space DES;
84  typedef Kokkos::RangePolicy<DES, IndexType> range_type;
85 
86  const IndexType numOut = numImportPacketsPerLID.extent (0);
87  size_t totalImportPackets = 0;
88  parallel_reduce ("Count import packets",
89  range_type (0, numOut),
90  SumFunctor<DeviceType, IndexType> (numImportPacketsPerLID),
91  totalImportPackets);
92  return totalImportPackets;
93  }
94  } // namespace (anonymous)
95 
96 
97  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
99  DistObject (const Teuchos::RCP<const map_type>& map) :
100  map_ (map)
101  {
102 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
103  using Teuchos::RCP;
104  using Teuchos::Time;
105  using Teuchos::TimeMonitor;
106 
107  RCP<Time> doXferTimer =
108  TimeMonitor::lookupCounter ("Tpetra::DistObject::doTransfer");
109  if (doXferTimer.is_null ()) {
110  doXferTimer =
111  TimeMonitor::getNewCounter ("Tpetra::DistObject::doTransfer");
112  }
113  doXferTimer_ = doXferTimer;
114 
115  RCP<Time> copyAndPermuteTimer =
116  TimeMonitor::lookupCounter ("Tpetra::DistObject::copyAndPermute");
117  if (copyAndPermuteTimer.is_null ()) {
118  copyAndPermuteTimer =
119  TimeMonitor::getNewCounter ("Tpetra::DistObject::copyAndPermute");
120  }
121  copyAndPermuteTimer_ = copyAndPermuteTimer;
122 
123  RCP<Time> packAndPrepareTimer =
124  TimeMonitor::lookupCounter ("Tpetra::DistObject::packAndPrepare");
125  if (packAndPrepareTimer.is_null ()) {
126  packAndPrepareTimer =
127  TimeMonitor::getNewCounter ("Tpetra::DistObject::packAndPrepare");
128  }
129  packAndPrepareTimer_ = packAndPrepareTimer;
130 
131  RCP<Time> doPostsAndWaitsTimer =
132  TimeMonitor::lookupCounter ("Tpetra::DistObject::doPostsAndWaits");
133  if (doPostsAndWaitsTimer.is_null ()) {
134  doPostsAndWaitsTimer =
135  TimeMonitor::getNewCounter ("Tpetra::DistObject::doPostsAndWaits");
136  }
137  doPostsAndWaitsTimer_ = doPostsAndWaitsTimer;
138 
139  RCP<Time> unpackAndCombineTimer =
140  TimeMonitor::lookupCounter ("Tpetra::DistObject::unpackAndCombine");
141  if (unpackAndCombineTimer.is_null ()) {
142  unpackAndCombineTimer =
143  TimeMonitor::getNewCounter ("Tpetra::DistObject::unpackAndCombine");
144  }
145  unpackAndCombineTimer_ = unpackAndCombineTimer;
146 #endif // HAVE_TPETRA_TRANSFER_TIMERS
147  }
148 
149  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
150  std::string
152  description () const
153  {
154  using Teuchos::TypeNameTraits;
155 
156  std::ostringstream os;
157  os << "\"Tpetra::DistObject\": {"
158  << "Packet: " << TypeNameTraits<packet_type>::name ()
159  << ", LocalOrdinal: " << TypeNameTraits<local_ordinal_type>::name ()
160  << ", GlobalOrdinal: " << TypeNameTraits<global_ordinal_type>::name ()
161  << ", Node: " << TypeNameTraits<Node>::name ();
162  if (this->getObjectLabel () != "") {
163  os << "Label: \"" << this->getObjectLabel () << "\"";
164  }
165  os << "}";
166  return os.str ();
167  }
168 
169  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
170  void
172  describe (Teuchos::FancyOStream &out,
173  const Teuchos::EVerbosityLevel verbLevel) const
174  {
175  using Teuchos::rcpFromRef;
176  using Teuchos::TypeNameTraits;
177  using std::endl;
178  const Teuchos::EVerbosityLevel vl = (verbLevel == Teuchos::VERB_DEFAULT) ?
179  Teuchos::VERB_LOW : verbLevel;
180  Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getMap ()->getComm ();
181  const int myRank = comm.is_null () ? 0 : comm->getRank ();
182  const int numProcs = comm.is_null () ? 1 : comm->getSize ();
183 
184  if (vl != Teuchos::VERB_NONE) {
185  Teuchos::OSTab tab0 (out);
186  if (myRank == 0) {
187  out << "\"Tpetra::DistObject\":" << endl;
188  }
189  Teuchos::OSTab tab1 (out);
190  if (myRank == 0) {
191  out << "Template parameters:" << endl;
192  {
193  Teuchos::OSTab tab2 (out);
194  out << "Packet: " << TypeNameTraits<packet_type>::name () << endl
195  << "LocalOrdinal: " << TypeNameTraits<local_ordinal_type>::name () << endl
196  << "GlobalOrdinal: " << TypeNameTraits<global_ordinal_type>::name () << endl
197  << "Node: " << TypeNameTraits<node_type>::name () << endl;
198  }
199  if (this->getObjectLabel () != "") {
200  out << "Label: \"" << this->getObjectLabel () << "\"" << endl;
201  }
202  } // if myRank == 0
203 
204  // Describe the Map.
205  {
206  if (myRank == 0) {
207  out << "Map:" << endl;
208  }
209  Teuchos::OSTab tab2 (out);
210  map_->describe (out, vl);
211  }
212 
213  // At verbosity > VERB_LOW, each process prints something.
214  if (vl > Teuchos::VERB_LOW) {
215  for (int p = 0; p < numProcs; ++p) {
216  if (myRank == p) {
217  out << "Process " << myRank << ":" << endl;
218  Teuchos::OSTab tab2 (out);
219  out << "Export buffer size (in packets): "
220  << exports_.extent (0)
221  << endl
222  << "Import buffer size (in packets): "
223  << imports_.extent (0)
224  << endl;
225  }
226  if (! comm.is_null ()) {
227  comm->barrier (); // give output time to finish
228  comm->barrier ();
229  comm->barrier ();
230  }
231  } // for each process rank p
232  } // if vl > VERB_LOW
233  } // if vl != VERB_NONE
234  }
235 
236  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
237  void
239  removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& /* newMap */)
240  {
241  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
242  "Tpetra::DistObject::removeEmptyProcessesInPlace: Not implemented");
243  }
244 
245  /* These are provided in base DistObject template
246  template<class DistObjectType>
247  void
248  removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input,
249  const Teuchos::RCP<const Map<typename DistObjectType::local_ordinal_type,
250  typename DistObjectType::global_ordinal_type,
251  typename DistObjectType::node_type> >& newMap)
252  {
253  input->removeEmptyProcessesInPlace (newMap);
254  if (newMap.is_null ()) { // my process is excluded
255  input = Teuchos::null;
256  }
257  }
258 
259  template<class DistObjectType>
260  void
261  removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input)
262  {
263  using Teuchos::RCP;
264  typedef typename DistObjectType::local_ordinal_type LO;
265  typedef typename DistObjectType::global_ordinal_type GO;
266  typedef typename DistObjectType::node_type NT;
267  typedef Map<LO, GO, NT> map_type;
268 
269  RCP<const map_type> newMap = input->getMap ()->removeEmptyProcesses ();
270  removeEmptyProcessesInPlace<DistObjectType> (input, newMap);
271  }
272  */
273 
274  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
275  void
277  doImport (const SrcDistObject& source,
279  const CombineMode CM,
280  const bool restrictedMode)
281  {
282  using Details::Behavior;
283  using std::endl;
284  const char modeString[] = "doImport (forward mode)";
285 
286  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
287  // output to std::cerr on every MPI process. This is unwise for
288  // runs with large numbers of MPI processes.
289  const bool verbose = Behavior::verbose("DistObject");
290  std::unique_ptr<std::string> prefix;
291  if (verbose) {
292  prefix = this->createPrefix("DistObject", modeString);
293  std::ostringstream os;
294  os << *prefix << "Start" << endl;
295  std::cerr << os.str ();
296  }
297  this->beginImport(source, importer, CM, restrictedMode);
298  this->endImport(source, importer, CM, restrictedMode);
299  if (verbose) {
300  std::ostringstream os;
301  os << *prefix << "Done" << endl;
302  std::cerr << os.str ();
303  }
304  }
305 
306  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
307  void
309  doExport (const SrcDistObject& source,
311  const CombineMode CM,
312  const bool restrictedMode)
313  {
314  using Details::Behavior;
315  using std::endl;
316  const char modeString[] = "doExport (forward mode)";
317 
318  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
319  // output to std::cerr on every MPI process. This is unwise for
320  // runs with large numbers of MPI processes.
321  const bool verbose = Behavior::verbose("DistObject");
322  std::unique_ptr<std::string> prefix;
323  if (verbose) {
324  prefix = this->createPrefix("DistObject", modeString);
325  std::ostringstream os;
326  os << *prefix << "Start" << endl;
327  std::cerr << os.str ();
328  }
329  this->beginExport(source, exporter, CM, restrictedMode);
330  this->endExport(source, exporter, CM, restrictedMode);
331  if (verbose) {
332  std::ostringstream os;
333  os << *prefix << "Done" << endl;
334  std::cerr << os.str ();
335  }
336  }
337 
338  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
339  void
341  doImport (const SrcDistObject& source,
343  const CombineMode CM,
344  const bool restrictedMode)
345  {
346  using Details::Behavior;
347  using std::endl;
348  const char modeString[] = "doImport (reverse mode)";
349 
350  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
351  // output to std::cerr on every MPI process. This is unwise for
352  // runs with large numbers of MPI processes.
353  const bool verbose = Behavior::verbose("DistObject");
354  std::unique_ptr<std::string> prefix;
355  if (verbose) {
356  prefix = this->createPrefix("DistObject", modeString);
357  std::ostringstream os;
358  os << *prefix << "Start" << endl;
359  std::cerr << os.str ();
360  }
361  this->beginImport(source, exporter, CM, restrictedMode);
362  this->endImport(source, exporter, CM, restrictedMode);
363  if (verbose) {
364  std::ostringstream os;
365  os << *prefix << "Done" << endl;
366  std::cerr << os.str ();
367  }
368  }
369 
370  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
371  void
373  doExport (const SrcDistObject& source,
375  const CombineMode CM,
376  const bool restrictedMode)
377  {
378  using Details::Behavior;
379  using std::endl;
380  const char modeString[] = "doExport (reverse mode)";
381 
382  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
383  // output to std::cerr on every MPI process. This is unwise for
384  // runs with large numbers of MPI processes.
385  const bool verbose = Behavior::verbose("DistObject");
386  std::unique_ptr<std::string> prefix;
387  if (verbose) {
388  prefix = this->createPrefix("DistObject", modeString);
389  std::ostringstream os;
390  os << *prefix << "Start" << endl;
391  std::cerr << os.str ();
392  }
393  this->beginExport(source, importer, CM, restrictedMode);
394  this->endExport(source, importer, CM, restrictedMode);
395  if (verbose) {
396  std::ostringstream os;
397  os << *prefix << "Done" << endl;
398  std::cerr << os.str ();
399  }
400  }
401 
402  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
403  void
405  beginImport(const SrcDistObject& source,
407  const CombineMode CM,
408  const bool restrictedMode)
409  {
410  using Details::Behavior;
411  using std::endl;
412  const char modeString[] = "beginImport (forward mode)";
413 
414  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
415  // output to std::cerr on every MPI process. This is unwise for
416  // runs with large numbers of MPI processes.
417  const bool verbose = Behavior::verbose("DistObject");
418  std::unique_ptr<std::string> prefix;
419  if (verbose) {
420  prefix = this->createPrefix("DistObject", modeString);
421  std::ostringstream os;
422  os << *prefix << "Start" << endl;
423  std::cerr << os.str ();
424  }
425  this->beginTransfer(source, importer, modeString, DoForward, CM, restrictedMode);
426  if (verbose) {
427  std::ostringstream os;
428  os << *prefix << "Done" << endl;
429  std::cerr << os.str ();
430  }
431  }
432 
433  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
434  void
435  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
436  beginExport(const SrcDistObject& source,
437  const Export<LocalOrdinal, GlobalOrdinal, Node>& exporter,
438  const CombineMode CM,
439  const bool restrictedMode)
440  {
441  using Details::Behavior;
442  using std::endl;
443  const char modeString[] = "beginExport (forward mode)";
444 
445  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
446  // output to std::cerr on every MPI process. This is unwise for
447  // runs with large numbers of MPI processes.
448  const bool verbose = Behavior::verbose("DistObject");
449  std::unique_ptr<std::string> prefix;
450  if (verbose) {
451  prefix = this->createPrefix("DistObject", modeString);
452  std::ostringstream os;
453  os << *prefix << "Start" << endl;
454  std::cerr << os.str ();
455  }
456  this->beginTransfer(source, exporter, modeString, DoForward, CM, restrictedMode);
457  if (verbose) {
458  std::ostringstream os;
459  os << *prefix << "Done" << endl;
460  std::cerr << os.str ();
461  }
462  }
463 
464  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
465  void
466  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
467  beginImport(const SrcDistObject& source,
468  const Export<LocalOrdinal, GlobalOrdinal, Node>& exporter,
469  const CombineMode CM,
470  const bool restrictedMode)
471  {
472  using Details::Behavior;
473  using std::endl;
474  const char modeString[] = "beginImport (reverse mode)";
475 
476  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
477  // output to std::cerr on every MPI process. This is unwise for
478  // runs with large numbers of MPI processes.
479  const bool verbose = Behavior::verbose("DistObject");
480  std::unique_ptr<std::string> prefix;
481  if (verbose) {
482  prefix = this->createPrefix("DistObject", modeString);
483  std::ostringstream os;
484  os << *prefix << "Start" << endl;
485  std::cerr << os.str ();
486  }
487  this->beginTransfer(source, exporter, modeString, DoReverse, CM, restrictedMode);
488  if (verbose) {
489  std::ostringstream os;
490  os << *prefix << "Done" << endl;
491  std::cerr << os.str ();
492  }
493  }
494 
495  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
496  void
497  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
498  beginExport(const SrcDistObject& source,
499  const Import<LocalOrdinal, GlobalOrdinal, Node> & importer,
500  const CombineMode CM,
501  const bool restrictedMode)
502  {
503  using Details::Behavior;
504  using std::endl;
505  const char modeString[] = "beginExport (reverse mode)";
506 
507  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
508  // output to std::cerr on every MPI process. This is unwise for
509  // runs with large numbers of MPI processes.
510  const bool verbose = Behavior::verbose("DistObject");
511  std::unique_ptr<std::string> prefix;
512  if (verbose) {
513  prefix = this->createPrefix("DistObject", modeString);
514  std::ostringstream os;
515  os << *prefix << "Start" << endl;
516  std::cerr << os.str ();
517  }
518  this->beginTransfer(source, importer, modeString, DoReverse, CM, restrictedMode);
519  if (verbose) {
520  std::ostringstream os;
521  os << *prefix << "Done" << endl;
522  std::cerr << os.str ();
523  }
524  }
525 
526  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
527  void
528  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
529  endImport(const SrcDistObject& source,
530  const Import<LocalOrdinal, GlobalOrdinal, Node>& importer,
531  const CombineMode CM,
532  const bool restrictedMode)
533  {
534  using Details::Behavior;
535  using std::endl;
536  const char modeString[] = "endImport (forward mode)";
537 
538  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
539  // output to std::cerr on every MPI process. This is unwise for
540  // runs with large numbers of MPI processes.
541  const bool verbose = Behavior::verbose("DistObject");
542  std::unique_ptr<std::string> prefix;
543  if (verbose) {
544  prefix = this->createPrefix("DistObject", modeString);
545  std::ostringstream os;
546  os << *prefix << "Start" << endl;
547  std::cerr << os.str ();
548  }
549  this->endTransfer(source, importer, modeString, DoForward, CM, restrictedMode);
550  if (verbose) {
551  std::ostringstream os;
552  os << *prefix << "Done" << endl;
553  std::cerr << os.str ();
554  }
555  }
556 
557  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
558  void
559  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
560  endExport(const SrcDistObject& source,
561  const Export<LocalOrdinal, GlobalOrdinal, Node>& exporter,
562  const CombineMode CM,
563  const bool restrictedMode)
564  {
565  using Details::Behavior;
566  using std::endl;
567  const char modeString[] = "endExport (forward mode)";
568 
569  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
570  // output to std::cerr on every MPI process. This is unwise for
571  // runs with large numbers of MPI processes.
572  const bool verbose = Behavior::verbose("DistObject");
573  std::unique_ptr<std::string> prefix;
574  if (verbose) {
575  prefix = this->createPrefix("DistObject", modeString);
576  std::ostringstream os;
577  os << *prefix << "Start" << endl;
578  std::cerr << os.str ();
579  }
580  this->endTransfer(source, exporter, modeString, DoForward, CM, restrictedMode);
581  if (verbose) {
582  std::ostringstream os;
583  os << *prefix << "Done" << endl;
584  std::cerr << os.str ();
585  }
586  }
587 
588  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
589  void
590  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
591  endImport(const SrcDistObject& source,
592  const Export<LocalOrdinal, GlobalOrdinal, Node>& exporter,
593  const CombineMode CM,
594  const bool restrictedMode)
595  {
596  using Details::Behavior;
597  using std::endl;
598  const char modeString[] = "endImport (reverse mode)";
599 
600  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
601  // output to std::cerr on every MPI process. This is unwise for
602  // runs with large numbers of MPI processes.
603  const bool verbose = Behavior::verbose("DistObject");
604  std::unique_ptr<std::string> prefix;
605  if (verbose) {
606  prefix = this->createPrefix("DistObject", modeString);
607  std::ostringstream os;
608  os << *prefix << "Start" << endl;
609  std::cerr << os.str ();
610  }
611  this->endTransfer(source, exporter, modeString, DoReverse, CM, restrictedMode);
612  if (verbose) {
613  std::ostringstream os;
614  os << *prefix << "Done" << endl;
615  std::cerr << os.str ();
616  }
617  }
618 
619  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
620  void
621  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
622  endExport(const SrcDistObject& source,
623  const Import<LocalOrdinal, GlobalOrdinal, Node> & importer,
624  const CombineMode CM,
625  const bool restrictedMode)
626  {
627  using Details::Behavior;
628  using std::endl;
629  const char modeString[] = "endExport (reverse mode)";
630 
631  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
632  // output to std::cerr on every MPI process. This is unwise for
633  // runs with large numbers of MPI processes.
634  const bool verbose = Behavior::verbose("DistObject");
635  std::unique_ptr<std::string> prefix;
636  if (verbose) {
637  prefix = this->createPrefix("DistObject", modeString);
638  std::ostringstream os;
639  os << *prefix << "Start" << endl;
640  std::cerr << os.str ();
641  }
642  this->endTransfer(source, importer, modeString, DoReverse, CM, restrictedMode);
643  if (verbose) {
644  std::ostringstream os;
645  os << *prefix << "Done" << endl;
646  std::cerr << os.str ();
647  }
648  }
649 
650  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
651  bool
654  return distributorActor_.isReady();
655  }
656 
657  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
658  bool
660  isDistributed () const {
661  return map_->isDistributed ();
662  }
663 
664  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
665  size_t
668  return 0; // default implementation; subclasses may override
669  }
670 
671  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
672  void
675  const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
676  const char modeString[],
677  const ReverseOption revOp,
678  const CombineMode CM,
679  bool restrictedMode)
680  {
681  beginTransfer(src, transfer, modeString, revOp, CM, restrictedMode);
682  endTransfer(src, transfer, modeString, revOp, CM, restrictedMode);
683  }
684 
685  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
686  bool
688  reallocImportsIfNeeded (const size_t newSize,
689  const bool verbose,
690  const std::string* prefix,
691  const bool /*remoteLIDsContiguous*/,
692  const CombineMode /*CM*/)
693  {
694  if (verbose) {
695  std::ostringstream os;
696  os << *prefix << "Realloc (if needed) imports_ from "
697  << imports_.extent (0) << " to " << newSize << std::endl;
698  std::cerr << os.str ();
699  }
701  const bool reallocated =
702  reallocDualViewIfNeeded (this->imports_, newSize, "imports");
703  if (verbose) {
704  std::ostringstream os;
705  os << *prefix << "Finished realloc'ing imports_" << std::endl;
706  std::cerr << os.str ();
707  }
708  return reallocated;
709  }
710 
711  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
712  bool
714  reallocArraysForNumPacketsPerLid (const size_t numExportLIDs,
715  const size_t numImportLIDs)
716  {
717  using Details::Behavior;
720  using std::endl;
721  // If an array is already allocated, and if is at least
722  // tooBigFactor times bigger than it needs to be, free it and
723  // reallocate to the size we need, in order to save space.
724  // Otherwise, take subviews to reduce allocation size.
725  constexpr size_t tooBigFactor = 10;
726 
727  const bool verbose = Behavior::verbose("DistObject");
728  std::unique_ptr<std::string> prefix;
729  if (verbose) {
730  prefix = this->createPrefix("DistObject",
731  "reallocArraysForNumPacketsPerLid");
732  std::ostringstream os;
733  os << *prefix
734  << "numExportLIDs: " << numExportLIDs
735  << ", numImportLIDs: " << numImportLIDs
736  << endl;
737  os << *prefix << "DualView status before:" << endl
738  << *prefix
739  << dualViewStatusToString (this->numExportPacketsPerLID_,
740  "numExportPacketsPerLID_")
741  << endl
742  << *prefix
743  << dualViewStatusToString (this->numImportPacketsPerLID_,
744  "numImportPacketsPerLID_")
745  << endl;
746  std::cerr << os.str ();
747  }
748 
749  // Reallocate numExportPacketsPerLID_ if needed.
750  const bool firstReallocated =
751  reallocDualViewIfNeeded (this->numExportPacketsPerLID_,
752  numExportLIDs,
753  "numExportPacketsPerLID",
754  tooBigFactor,
755  true); // need fence before, if realloc'ing
756 
757  // If we reallocated above, then we fenced after that
758  // reallocation. This means that we don't need to fence again,
759  // before the next reallocation.
760  const bool needFenceBeforeNextAlloc = ! firstReallocated;
761  const bool secondReallocated =
762  reallocDualViewIfNeeded (this->numImportPacketsPerLID_,
763  numImportLIDs,
764  "numImportPacketsPerLID",
765  tooBigFactor,
766  needFenceBeforeNextAlloc);
767 
768  if (verbose) {
769  std::ostringstream os;
770  os << *prefix << "DualView status after:" << endl
771  << *prefix << dualViewStatusToString (this->numExportPacketsPerLID_,
772  "numExportPacketsPerLID_")
773  << endl
774  << *prefix << dualViewStatusToString (this->numImportPacketsPerLID_,
775  "numImportPacketsPerLID_")
776  << endl;
777  std::cerr << os.str ();
778  }
779 
780  return firstReallocated || secondReallocated;
781  }
782 
783  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
784  void
787  const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
788  const char modeString[],
789  const ReverseOption revOp,
790  const CombineMode CM,
791  bool restrictedMode)
792  {
793  using Details::Behavior;
797  using Kokkos::Compat::getArrayView;
798  using Kokkos::Compat::getConstArrayView;
799  using Kokkos::Compat::getKokkosViewDeepCopy;
800  using Kokkos::Compat::create_const_view;
801  using std::endl;
804 
805  const bool commOnHost = ! Behavior::assumeMpiIsGPUAware ();
806  const char funcNameHost[] = "Tpetra::DistObject::beginTransfer[Host]";
807  const char funcNameDevice[] = "Tpetra::DistObject::beginTransfer[Device]";
808  const char *funcName = commOnHost ? funcNameHost : funcNameDevice;
809 
810  ProfilingRegion region_doTransfer(funcName);
811  const bool verbose = Behavior::verbose("DistObject");
812  std::shared_ptr<std::string> prefix;
813  if (verbose) {
814  std::ostringstream os;
815  prefix = this->createPrefix("DistObject", "doTransfer");
816  os << *prefix << "Source type: " << Teuchos::typeName(src)
817  << ", Target type: " << Teuchos::typeName(*this) << endl;
818  std::cerr << os.str();
819  }
820 
821  // "Restricted Mode" does two things:
822  // 1) Skips copyAndPermute
823  // 2) Allows the "target" Map of the transfer to be a subset of
824  // the Map of *this, in a "locallyFitted" sense.
825  //
826  // This cannot be used if #2 is not true, OR there are permutes.
827  // Source Maps still need to match
828 
829  // mfh 18 Oct 2017: Set TPETRA_DEBUG to true to enable extra debug
830  // checks. These may communicate more.
831  const bool debug = Behavior::debug("DistObject");
832  if (debug) {
833  if (! restrictedMode && revOp == DoForward) {
834  const bool myMapSameAsTransferTgtMap =
835  this->getMap ()->isSameAs (* (transfer.getTargetMap ()));
836  TEUCHOS_TEST_FOR_EXCEPTION
837  (! myMapSameAsTransferTgtMap, std::invalid_argument,
838  "Tpetra::DistObject::" << modeString << ": For forward-mode "
839  "communication, the target DistObject's Map must be the same "
840  "(in the sense of Tpetra::Map::isSameAs) as the input "
841  "Export/Import object's target Map.");
842  }
843  else if (! restrictedMode && revOp == DoReverse) {
844  const bool myMapSameAsTransferSrcMap =
845  this->getMap ()->isSameAs (* (transfer.getSourceMap ()));
846  TEUCHOS_TEST_FOR_EXCEPTION
847  (! myMapSameAsTransferSrcMap, std::invalid_argument,
848  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
849  "communication, the target DistObject's Map must be the same "
850  "(in the sense of Tpetra::Map::isSameAs) as the input "
851  "Export/Import object's source Map.");
852  }
853  else if (restrictedMode && revOp == DoForward) {
854  const bool myMapLocallyFittedTransferTgtMap =
855  this->getMap ()->isLocallyFitted (* (transfer.getTargetMap ()));
856  TEUCHOS_TEST_FOR_EXCEPTION
857  (! myMapLocallyFittedTransferTgtMap , std::invalid_argument,
858  "Tpetra::DistObject::" << modeString << ": For forward-mode "
859  "communication using restricted mode, Export/Import object's "
860  "target Map must be locally fitted (in the sense of "
861  "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
862  }
863  else { // if (restrictedMode && revOp == DoReverse)
864  const bool myMapLocallyFittedTransferSrcMap =
865  this->getMap ()->isLocallyFitted (* (transfer.getSourceMap ()));
866  TEUCHOS_TEST_FOR_EXCEPTION
867  (! myMapLocallyFittedTransferSrcMap, std::invalid_argument,
868  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
869  "communication using restricted mode, Export/Import object's "
870  "source Map must be locally fitted (in the sense of "
871  "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
872  }
873 
874  // SrcDistObject need not even _have_ Maps. However, if the
875  // source object is a DistObject, it has a Map, and we may
876  // compare that Map with the Transfer's Maps.
877  const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
878  if (srcDistObj != nullptr) {
879  if (revOp == DoForward) {
880  const bool srcMapSameAsImportSrcMap =
881  srcDistObj->getMap ()->isSameAs (* (transfer.getSourceMap ()));
882  TEUCHOS_TEST_FOR_EXCEPTION
883  (! srcMapSameAsImportSrcMap, std::invalid_argument,
884  "Tpetra::DistObject::" << modeString << ": For forward-mode "
885  "communication, the source DistObject's Map must be the same "
886  "as the input Export/Import object's source Map.");
887  }
888  else { // revOp == DoReverse
889  const bool srcMapSameAsImportTgtMap =
890  srcDistObj->getMap ()->isSameAs (* (transfer.getTargetMap ()));
891  TEUCHOS_TEST_FOR_EXCEPTION
892  (! srcMapSameAsImportTgtMap, std::invalid_argument,
893  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
894  "communication, the source DistObject's Map must be the same "
895  "as the input Export/Import object's target Map.");
896  }
897  }
898  }
899 
900  const size_t numSameIDs = transfer.getNumSameIDs ();
901  Distributor& distor = transfer.getDistributor ();
902  const Details::DistributorPlan& distributorPlan = (revOp == DoForward) ? distor.getPlan() : *distor.getPlan().getReversePlan();
903 
904  TEUCHOS_TEST_FOR_EXCEPTION
905  (debug && restrictedMode &&
906  (transfer.getPermuteToLIDs_dv().extent(0) != 0 ||
907  transfer.getPermuteFromLIDs_dv().extent(0) != 0),
908  std::invalid_argument,
909  "Tpetra::DistObject::" << modeString << ": Transfer object "
910  "cannot have permutes in restricted mode.");
911 
912  // Do we need all communication buffers to live on host?
913  if (verbose) {
914  std::ostringstream os;
915  os << *prefix << "doTransfer: Use new interface; "
916  "commOnHost=" << (commOnHost ? "true" : "false") << endl;
917  std::cerr << os.str ();
918  }
919 
920  using const_lo_dv_type =
921  Kokkos::DualView<const local_ordinal_type*, buffer_device_type>;
922  const_lo_dv_type permuteToLIDs = (revOp == DoForward) ?
923  transfer.getPermuteToLIDs_dv () :
924  transfer.getPermuteFromLIDs_dv ();
925  const_lo_dv_type permuteFromLIDs = (revOp == DoForward) ?
926  transfer.getPermuteFromLIDs_dv () :
927  transfer.getPermuteToLIDs_dv ();
928  const_lo_dv_type remoteLIDs = (revOp == DoForward) ?
929  transfer.getRemoteLIDs_dv () :
930  transfer.getExportLIDs_dv ();
931  const_lo_dv_type exportLIDs = (revOp == DoForward) ?
932  transfer.getExportLIDs_dv () :
933  transfer.getRemoteLIDs_dv ();
934  const bool canTryAliasing = (revOp == DoForward) ?
935  transfer.areRemoteLIDsContiguous() :
936  transfer.areExportLIDsContiguous();
937  // const bool canTryAliasing = false;
938 
939  ProfilingRegion region_dTN(funcName);
940 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
941  // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in favor
942  // of Kokkos profiling.
943  Teuchos::TimeMonitor doXferMon (*doXferTimer_);
944 #endif // HAVE_TPETRA_TRANSFER_TIMERS
945 
946  if (verbose) {
947  std::ostringstream os;
948  os << *prefix << "Input arguments:" << endl
949  << *prefix << " combineMode: " << combineModeToString (CM) << endl
950  << *prefix << " numSameIDs: " << numSameIDs << endl
951  << *prefix << " "
952  << dualViewStatusToString (permuteToLIDs, "permuteToLIDs") << endl
953  << *prefix << " "
954  << dualViewStatusToString (permuteFromLIDs, "permuteFromLIDs") << endl
955  << *prefix << " "
956  << dualViewStatusToString (remoteLIDs, "remoteLIDs") << endl
957  << *prefix << " "
958  << dualViewStatusToString (exportLIDs, "exportLIDs") << endl
959  << *prefix << " revOp: Do" << (revOp == DoReverse ? "Reverse" : "Forward") << endl
960  << *prefix << " commOnHost: " << (commOnHost ? "true" : "false") << endl;
961  std::cerr << os.str ();
962  }
963 
964  {
965  ProfilingRegion region_cs ("Tpetra::DistObject::doTransferNew::checkSizes");
966  if (verbose) {
967  std::ostringstream os;
968  os << *prefix << "1. checkSizes" << endl;
969  std::cerr << os.str ();
970  }
971  const bool checkSizesResult = this->checkSizes (src);
972  TEUCHOS_TEST_FOR_EXCEPTION
973  (! checkSizesResult, std::invalid_argument,
974  "Tpetra::DistObject::doTransfer: checkSizes() indicates that the "
975  "destination object is not a legal target for redistribution from the "
976  "source object. This probably means that they do not have the same "
977  "dimensions. For example, MultiVectors must have the same number of "
978  "rows and columns.");
979  }
980 
981  // NOTE (mfh 26 Apr 2016) Chris Baker's implementation understood
982  // that if CM == INSERT || CM == REPLACE, the target object could
983  // be write only. We don't optimize for that here.
984 
985  if (!restrictedMode && numSameIDs + permuteToLIDs.extent (0) != 0) {
986  // There is at least one GID to copy or permute.
987  if (verbose) {
988  std::ostringstream os;
989  os << *prefix << "2. copyAndPermute" << endl;
990  std::cerr << os.str ();
991  }
992  ProfilingRegion region_cp
993  ("Tpetra::DistObject::doTransferNew::copyAndPermute");
994 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
995  // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in favor
996  // of Kokkos profiling.
997  Teuchos::TimeMonitor copyAndPermuteMon (*copyAndPermuteTimer_);
998 #endif // HAVE_TPETRA_TRANSFER_TIMERS
999 
1000  if (numSameIDs + permuteToLIDs.extent (0) != 0) {
1001  // There is at least one GID to copy or permute.
1002  if (verbose) {
1003  std::ostringstream os;
1004  os << *prefix << "2. copyAndPermute" << endl;
1005  std::cerr << os.str ();
1006  }
1007  this->copyAndPermute (src, numSameIDs, permuteToLIDs,
1008  permuteFromLIDs, CM);
1009  if (verbose) {
1010  std::ostringstream os;
1011  os << *prefix << "After copyAndPermute:" << endl
1012  << *prefix << " "
1013  << dualViewStatusToString (permuteToLIDs, "permuteToLIDs")
1014  << endl
1015  << *prefix << " "
1016  << dualViewStatusToString (permuteFromLIDs, "permuteFromLIDs")
1017  << endl;
1018  std::cerr << os.str ();
1019  }
1020  }
1021  }
1022 
1023  // The method may return zero even if the implementation actually
1024  // does have a constant number of packets per LID. However, if it
1025  // returns nonzero, we may use this information to avoid
1026  // (re)allocating num{Ex,Im}portPacketsPerLID_. packAndPrepare()
1027  // will set this to its final value.
1028  //
1029  // We only need this if CM != ZERO, but it has to be lifted out of
1030  // that scope because there are multiple tests for CM != ZERO.
1031  size_t constantNumPackets = this->constantNumberOfPackets ();
1032  if (verbose) {
1033  std::ostringstream os;
1034  os << *prefix << "constantNumPackets=" << constantNumPackets << endl;
1035  std::cerr << os.str ();
1036  }
1037 
1038  // We only need to pack communication buffers if the combine mode
1039  // is not ZERO. A "ZERO combine mode" means that the results are
1040  // the same as if we had received all zeros, and added them to the
1041  // existing values. That means we don't need to communicate.
1042  if (CM != ZERO) {
1043  if (constantNumPackets == 0) {
1044  if (verbose) {
1045  std::ostringstream os;
1046  os << *prefix << "3. (Re)allocate num{Ex,Im}portPacketsPerLID"
1047  << endl;
1048  std::cerr << os.str ();
1049  }
1050  // This only reallocates if necessary, that is, if the sizes
1051  // don't match.
1052  this->reallocArraysForNumPacketsPerLid (exportLIDs.extent (0),
1053  remoteLIDs.extent (0));
1054  }
1055 
1056  if (verbose) {
1057  std::ostringstream os;
1058  os << *prefix << "4. packAndPrepare: before, "
1059  << dualViewStatusToString (this->exports_, "exports_")
1060  << endl;
1061  std::cerr << os.str ();
1062  }
1063 
1064  doPackAndPrepare(src, exportLIDs, constantNumPackets, execution_space());
1065  if (commOnHost) {
1066  this->exports_.sync_host();
1067  }
1068  else {
1069  this->exports_.sync_device();
1070  }
1071 
1072  if (verbose) {
1073  std::ostringstream os;
1074  os << *prefix << "5.1. After packAndPrepare, "
1075  << dualViewStatusToString (this->exports_, "exports_")
1076  << endl;
1077  std::cerr << os.str ();
1078  }
1079  } // if (CM != ZERO)
1080 
1081  // We only need to send data if the combine mode is not ZERO.
1082  if (CM != ZERO) {
1083  if (constantNumPackets != 0) {
1084  // There are a constant number of packets per element. We
1085  // already know (from the number of "remote" (incoming)
1086  // elements) how many incoming elements we expect, so we can
1087  // resize the buffer accordingly.
1088  const size_t rbufLen = remoteLIDs.extent (0) * constantNumPackets;
1089  reallocImportsIfNeeded (rbufLen, verbose, prefix.get (), canTryAliasing, CM);
1090  }
1091 
1092  // Do we need to do communication (via doPostsAndWaits)?
1093  bool needCommunication = true;
1094 
1095  // This may be NULL. It will be used below.
1096  const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
1097 
1098  if (revOp == DoReverse && ! this->isDistributed ()) {
1099  needCommunication = false;
1100  }
1101  // FIXME (mfh 30 Jun 2013): Checking whether the source object
1102  // is distributed requires a cast to DistObject. If it's not a
1103  // DistObject, then I'm not quite sure what to do. Perhaps it
1104  // would be more appropriate for SrcDistObject to have an
1105  // isDistributed() method. For now, I'll just assume that we
1106  // need to do communication unless the cast succeeds and the
1107  // source is not distributed.
1108  else if (revOp == DoForward && srcDistObj != NULL &&
1109  ! srcDistObj->isDistributed ()) {
1110  needCommunication = false;
1111  }
1112 
1113  if (! needCommunication) {
1114  if (verbose) {
1115  std::ostringstream os;
1116  os << *prefix << "Comm not needed; skipping" << endl;
1117  std::cerr << os.str ();
1118  }
1119  }
1120  else {
1121  ProfilingRegion region_dpw
1122  ("Tpetra::DistObject::doTransferNew::doPostsAndWaits");
1123 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
1124  // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in
1125  // favor of Kokkos profiling.
1126  Teuchos::TimeMonitor doPostsAndWaitsMon (*doPostsAndWaitsTimer_);
1127 #endif // HAVE_TPETRA_TRANSFER_TIMERS
1128 
1129  if (verbose) {
1130  std::ostringstream os;
1131  os << *prefix << "7.0. "
1132  << (revOp == DoReverse ? "Reverse" : "Forward")
1133  << " mode" << endl;
1134  std::cerr << os.str ();
1135  }
1136 
1137  doPosts(distributorPlan, constantNumPackets, commOnHost, prefix, canTryAliasing, CM);
1138  } // if (needCommunication)
1139  } // if (CM != ZERO)
1140  }
1141 
1142  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1143  void
1145  endTransfer(const SrcDistObject& src,
1146  const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
1147  const char modeString[],
1148  const ReverseOption revOp,
1149  const CombineMode CM,
1150  bool restrictedMode)
1151  {
1152  using Details::Behavior;
1156  using Kokkos::Compat::getArrayView;
1157  using Kokkos::Compat::getConstArrayView;
1158  using Kokkos::Compat::getKokkosViewDeepCopy;
1159  using Kokkos::Compat::create_const_view;
1160  using std::endl;
1163 
1164  const bool commOnHost = ! Behavior::assumeMpiIsGPUAware ();
1165  const char funcNameHost[] = "Tpetra::DistObject::endTransfer[Host]";
1166  const char funcNameDevice[] = "Tpetra::DistObject::endTransfer[Device]";
1167  const char *funcName = commOnHost ? funcNameHost : funcNameDevice;
1168  ProfilingRegion region_doTransfer(funcName);
1169  const bool verbose = Behavior::verbose("DistObject");
1170  std::shared_ptr<std::string> prefix;
1171  if (verbose) {
1172  std::ostringstream os;
1173  prefix = this->createPrefix("DistObject", "doTransfer");
1174  os << *prefix << "Source type: " << Teuchos::typeName(src)
1175  << ", Target type: " << Teuchos::typeName(*this) << endl;
1176  std::cerr << os.str();
1177  }
1178 
1179  // "Restricted Mode" does two things:
1180  // 1) Skips copyAndPermute
1181  // 2) Allows the "target" Map of the transfer to be a subset of
1182  // the Map of *this, in a "locallyFitted" sense.
1183  //
1184  // This cannot be used if #2 is not true, OR there are permutes.
1185  // Source Maps still need to match
1186 
1187  // mfh 18 Oct 2017: Set TPETRA_DEBUG to true to enable extra debug
1188  // checks. These may communicate more.
1189  const bool debug = Behavior::debug("DistObject");
1190  if (debug) {
1191  if (! restrictedMode && revOp == DoForward) {
1192  const bool myMapSameAsTransferTgtMap =
1193  this->getMap ()->isSameAs (* (transfer.getTargetMap ()));
1194  TEUCHOS_TEST_FOR_EXCEPTION
1195  (! myMapSameAsTransferTgtMap, std::invalid_argument,
1196  "Tpetra::DistObject::" << modeString << ": For forward-mode "
1197  "communication, the target DistObject's Map must be the same "
1198  "(in the sense of Tpetra::Map::isSameAs) as the input "
1199  "Export/Import object's target Map.");
1200  }
1201  else if (! restrictedMode && revOp == DoReverse) {
1202  const bool myMapSameAsTransferSrcMap =
1203  this->getMap ()->isSameAs (* (transfer.getSourceMap ()));
1204  TEUCHOS_TEST_FOR_EXCEPTION
1205  (! myMapSameAsTransferSrcMap, std::invalid_argument,
1206  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
1207  "communication, the target DistObject's Map must be the same "
1208  "(in the sense of Tpetra::Map::isSameAs) as the input "
1209  "Export/Import object's source Map.");
1210  }
1211  else if (restrictedMode && revOp == DoForward) {
1212  const bool myMapLocallyFittedTransferTgtMap =
1213  this->getMap ()->isLocallyFitted (* (transfer.getTargetMap ()));
1214  TEUCHOS_TEST_FOR_EXCEPTION
1215  (! myMapLocallyFittedTransferTgtMap , std::invalid_argument,
1216  "Tpetra::DistObject::" << modeString << ": For forward-mode "
1217  "communication using restricted mode, Export/Import object's "
1218  "target Map must be locally fitted (in the sense of "
1219  "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
1220  }
1221  else { // if (restrictedMode && revOp == DoReverse)
1222  const bool myMapLocallyFittedTransferSrcMap =
1223  this->getMap ()->isLocallyFitted (* (transfer.getSourceMap ()));
1224  TEUCHOS_TEST_FOR_EXCEPTION
1225  (! myMapLocallyFittedTransferSrcMap, std::invalid_argument,
1226  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
1227  "communication using restricted mode, Export/Import object's "
1228  "source Map must be locally fitted (in the sense of "
1229  "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
1230  }
1231 
1232  // SrcDistObject need not even _have_ Maps. However, if the
1233  // source object is a DistObject, it has a Map, and we may
1234  // compare that Map with the Transfer's Maps.
1235  const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
1236  if (srcDistObj != nullptr) {
1237  if (revOp == DoForward) {
1238  const bool srcMapSameAsImportSrcMap =
1239  srcDistObj->getMap ()->isSameAs (* (transfer.getSourceMap ()));
1240  TEUCHOS_TEST_FOR_EXCEPTION
1241  (! srcMapSameAsImportSrcMap, std::invalid_argument,
1242  "Tpetra::DistObject::" << modeString << ": For forward-mode "
1243  "communication, the source DistObject's Map must be the same "
1244  "as the input Export/Import object's source Map.");
1245  }
1246  else { // revOp == DoReverse
1247  const bool srcMapSameAsImportTgtMap =
1248  srcDistObj->getMap ()->isSameAs (* (transfer.getTargetMap ()));
1249  TEUCHOS_TEST_FOR_EXCEPTION
1250  (! srcMapSameAsImportTgtMap, std::invalid_argument,
1251  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
1252  "communication, the source DistObject's Map must be the same "
1253  "as the input Export/Import object's target Map.");
1254  }
1255  }
1256  }
1257 
1258  Distributor& distor = transfer.getDistributor ();
1259  const Details::DistributorPlan& distributorPlan = (revOp == DoForward) ? distor.getPlan() : *distor.getPlan().getReversePlan();
1260 
1261  TEUCHOS_TEST_FOR_EXCEPTION
1262  (debug && restrictedMode &&
1263  (transfer.getPermuteToLIDs_dv().extent(0) != 0 ||
1264  transfer.getPermuteFromLIDs_dv().extent(0) != 0),
1265  std::invalid_argument,
1266  "Tpetra::DistObject::" << modeString << ": Transfer object "
1267  "cannot have permutes in restricted mode.");
1268 
1269  // Do we need all communication buffers to live on host?
1270  if (verbose) {
1271  std::ostringstream os;
1272  os << *prefix << "doTransfer: Use new interface; "
1273  "commOnHost=" << (commOnHost ? "true" : "false") << endl;
1274  std::cerr << os.str ();
1275  }
1276 
1277  using const_lo_dv_type =
1278  Kokkos::DualView<const local_ordinal_type*, buffer_device_type>;
1279  const_lo_dv_type permuteToLIDs = (revOp == DoForward) ?
1280  transfer.getPermuteToLIDs_dv () :
1281  transfer.getPermuteFromLIDs_dv ();
1282  const_lo_dv_type permuteFromLIDs = (revOp == DoForward) ?
1283  transfer.getPermuteFromLIDs_dv () :
1284  transfer.getPermuteToLIDs_dv ();
1285  const_lo_dv_type remoteLIDs = (revOp == DoForward) ?
1286  transfer.getRemoteLIDs_dv () :
1287  transfer.getExportLIDs_dv ();
1288  const_lo_dv_type exportLIDs = (revOp == DoForward) ?
1289  transfer.getExportLIDs_dv () :
1290  transfer.getRemoteLIDs_dv ();
1291  const bool canTryAliasing = (revOp == DoForward) ?
1292  transfer.areRemoteLIDsContiguous() :
1293  transfer.areExportLIDsContiguous();
1294 
1295  size_t constantNumPackets = this->constantNumberOfPackets ();
1296 
1297  // We only need to send data if the combine mode is not ZERO.
1298  if (CM != ZERO) {
1299  if (constantNumPackets != 0) {
1300  // There are a constant number of packets per element. We
1301  // already know (from the number of "remote" (incoming)
1302  // elements) how many incoming elements we expect, so we can
1303  // resize the buffer accordingly.
1304  const size_t rbufLen = remoteLIDs.extent (0) * constantNumPackets;
1305  reallocImportsIfNeeded (rbufLen, verbose, prefix.get (), canTryAliasing, CM);
1306  }
1307 
1308  // Do we need to do communication (via doPostsAndWaits)?
1309  bool needCommunication = true;
1310 
1311  // This may be NULL. It will be used below.
1312  const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
1313 
1314  if (revOp == DoReverse && ! this->isDistributed ()) {
1315  needCommunication = false;
1316  }
1317  // FIXME (mfh 30 Jun 2013): Checking whether the source object
1318  // is distributed requires a cast to DistObject. If it's not a
1319  // DistObject, then I'm not quite sure what to do. Perhaps it
1320  // would be more appropriate for SrcDistObject to have an
1321  // isDistributed() method. For now, I'll just assume that we
1322  // need to do communication unless the cast succeeds and the
1323  // source is not distributed.
1324  else if (revOp == DoForward && srcDistObj != NULL &&
1325  ! srcDistObj->isDistributed ()) {
1326  needCommunication = false;
1327  }
1328 
1329  if (! needCommunication) {
1330  if (verbose) {
1331  std::ostringstream os;
1332  os << *prefix << "Comm not needed; skipping" << endl;
1333  std::cerr << os.str ();
1334  }
1335  }
1336  else {
1337  distributorActor_.doWaits(distributorPlan);
1338 
1339  if (verbose) {
1340  std::ostringstream os;
1341  os << *prefix << "8. unpackAndCombine - remoteLIDs " << remoteLIDs.extent(0) << ", constantNumPackets " << constantNumPackets << endl;
1342  std::cerr << os.str ();
1343  }
1344  doUnpackAndCombine(remoteLIDs, constantNumPackets, CM, execution_space());
1345  } // if (needCommunication)
1346  } // if (CM != ZERO)
1347 
1348  if (verbose) {
1349  std::ostringstream os;
1350  os << *prefix << "9. Done!" << endl;
1351  std::cerr << os.str ();
1352  }
1353 
1354  if (verbose) {
1355  std::ostringstream os;
1356  os << *prefix << "Tpetra::DistObject::doTransfer: Done!" << endl;
1357  std::cerr << os.str ();
1358  }
1359  }
1360 
1361  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1362  void
1363  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1364  doPosts(const Details::DistributorPlan& distributorPlan,
1365  size_t constantNumPackets,
1366  bool commOnHost,
1367  std::shared_ptr<std::string> prefix,
1368  const bool canTryAliasing,
1369  const CombineMode CM)
1370  {
1373  using Kokkos::Compat::create_const_view;
1374  using std::endl;
1375 
1376  const bool verbose = Details::Behavior::verbose("DistObject");
1377 
1378  if (constantNumPackets == 0) { // variable num packets per LID
1379  if (verbose) {
1380  std::ostringstream os;
1381  os << *prefix << "7.1. Variable # packets / LID: first comm "
1382  << "(commOnHost = " << (commOnHost ? "true" : "false") << ")"
1383  << endl;
1384  std::cerr << os.str ();
1385  }
1386  size_t totalImportPackets = 0;
1387  if (commOnHost) {
1388  if (this->numExportPacketsPerLID_.need_sync_host ()) {
1389  this->numExportPacketsPerLID_.sync_host ();
1390  }
1391  if (this->numImportPacketsPerLID_.need_sync_host ()) {
1392  this->numImportPacketsPerLID_.sync_host ();
1393  }
1394  this->numImportPacketsPerLID_.modify_host (); // out arg
1395  auto numExp_h =
1396  create_const_view (this->numExportPacketsPerLID_.view_host ());
1397  auto numImp_h = this->numImportPacketsPerLID_.view_host ();
1398 
1399  // MPI communication happens here.
1400  if (verbose) {
1401  std::ostringstream os;
1402  os << *prefix << "Call doPostsAndWaits"
1403  << endl;
1404  std::cerr << os.str ();
1405  }
1406  distributorActor_.doPostsAndWaits(distributorPlan, numExp_h, 1, numImp_h);
1407 
1408  if (verbose) {
1409  std::ostringstream os;
1410  os << *prefix << "Count totalImportPackets" << std::endl;
1411  std::cerr << os.str ();
1412  }
1413  using the_dev_type = typename decltype (numImp_h)::device_type;
1414  totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_h);
1415  }
1416  else { // ! commOnHost
1417  this->numExportPacketsPerLID_.sync_device ();
1418  this->numImportPacketsPerLID_.sync_device ();
1419  this->numImportPacketsPerLID_.modify_device (); // out arg
1420  auto numExp_d = create_const_view
1421  (this->numExportPacketsPerLID_.view_device ());
1422  auto numImp_d = this->numImportPacketsPerLID_.view_device ();
1423 
1424  // MPI communication happens here.
1425  if (verbose) {
1426  std::ostringstream os;
1427  os << *prefix << "Call doPostsAndWaits"
1428  << endl;
1429  std::cerr << os.str ();
1430  }
1431 
1432  distributorActor_.doPostsAndWaits(distributorPlan, numExp_d, 1, numImp_d);
1433 
1434  if (verbose) {
1435  std::ostringstream os;
1436  os << *prefix << "Count totalImportPackets" << std::endl;
1437  std::cerr << os.str ();
1438  }
1439  using the_dev_type = typename decltype (numImp_d)::device_type;
1440  totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_d);
1441  }
1442 
1443  if (verbose) {
1444  std::ostringstream os;
1445  os << *prefix << "totalImportPackets=" << totalImportPackets << endl;
1446  std::cerr << os.str ();
1447  }
1448  this->reallocImportsIfNeeded (totalImportPackets, verbose,
1449  prefix.get (), canTryAliasing, CM);
1450  if (verbose) {
1451  std::ostringstream os;
1452  os << *prefix << "7.3. Second comm" << std::endl;
1453  std::cerr << os.str ();
1454  }
1455 
1456  // mfh 04 Feb 2019: Distributor expects the "num packets per
1457  // LID" arrays on host, so that it can issue MPI sends and
1458  // receives correctly.
1459  this->numExportPacketsPerLID_.sync_host ();
1460  this->numImportPacketsPerLID_.sync_host ();
1461 
1462  // NOTE (mfh 25 Apr 2016, 01 Aug 2017) doPostsAndWaits and
1463  // doReversePostsAndWaits currently want
1464  // numExportPacketsPerLID and numImportPacketsPerLID as
1465  // Teuchos::ArrayView, rather than as Kokkos::View.
1466  //
1467  // NOTE (mfh 04 Feb 2019) This does NOT copy from host to
1468  // device. The above syncs might.
1469  auto numExportPacketsPerLID_av =
1470  getArrayViewFromDualView (this->numExportPacketsPerLID_);
1471  auto numImportPacketsPerLID_av =
1472  getArrayViewFromDualView (this->numImportPacketsPerLID_);
1473 
1474  // imports_ is for output only, so we don't need to sync it
1475  // before marking it as modified. However, in order to
1476  // prevent spurious debug-mode errors (e.g., "modified on
1477  // both device and host"), we first need to clear its
1478  // "modified" flags.
1479  this->imports_.clear_sync_state ();
1480 
1481  if (verbose) {
1482  std::ostringstream os;
1483  os << *prefix << "Comm on "
1484  << (commOnHost ? "host" : "device")
1485  << "; call doPosts" << endl;
1486  std::cerr << os.str ();
1487  }
1488 
1489  if (commOnHost) {
1490  this->imports_.modify_host ();
1491  distributorActor_.doPosts
1492  (distributorPlan,
1493  create_const_view (this->exports_.view_host ()),
1494  numExportPacketsPerLID_av,
1495  this->imports_.view_host (),
1496  numImportPacketsPerLID_av);
1497  }
1498  else { // pack on device
1499  Kokkos::fence("DistObject::doPosts-1"); // for UVM
1500  this->imports_.modify_device ();
1501  distributorActor_.doPosts
1502  (distributorPlan,
1503  create_const_view (this->exports_.view_device ()),
1504  numExportPacketsPerLID_av,
1505  this->imports_.view_device (),
1506  numImportPacketsPerLID_av);
1507  }
1508  }
1509  else { // constant number of packets per LID
1510  if (verbose) {
1511  std::ostringstream os;
1512  os << *prefix << "7.1. Const # packets per LID: " << endl
1513  << *prefix << " "
1514  << dualViewStatusToString (this->exports_, "exports_")
1515  << endl
1516  << *prefix << " "
1517  << dualViewStatusToString (this->exports_, "imports_")
1518  << endl;
1519  std::cerr << os.str ();
1520  }
1521  // imports_ is for output only, so we don't need to sync it
1522  // before marking it as modified. However, in order to
1523  // prevent spurious debug-mode errors (e.g., "modified on
1524  // both device and host"), we first need to clear its
1525  // "modified" flags.
1526  this->imports_.clear_sync_state ();
1527 
1528  if (verbose) {
1529  std::ostringstream os;
1530  os << *prefix << "7.2. Comm on "
1531  << (commOnHost ? "host" : "device")
1532  << "; call doPosts" << endl;
1533  std::cerr << os.str ();
1534  }
1535  if (commOnHost) {
1536  this->imports_.modify_host ();
1537  distributorActor_.doPosts
1538  (distributorPlan,
1539  create_const_view (this->exports_.view_host ()),
1540  constantNumPackets,
1541  this->imports_.view_host ());
1542  }
1543  else { // pack on device
1544  Kokkos::fence("DistObject::doPosts-2"); // for UVM
1545  this->imports_.modify_device ();
1546  distributorActor_.doPosts
1547  (distributorPlan,
1548  create_const_view (this->exports_.view_device ()),
1549  constantNumPackets,
1550  this->imports_.view_device ());
1551  } // commOnHost
1552  } // constant or variable num packets per LID
1553  }
1554 
1555  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1556  void
1557  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1558  doPackAndPrepare(const SrcDistObject& src,
1559  const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
1560  size_t& constantNumPackets,
1561  const execution_space &space)
1562  {
1563  using Details::ProfilingRegion;
1564  using std::endl;
1565  const bool debug = Details::Behavior::debug("DistObject");
1566 
1567  ProfilingRegion region_pp
1568  ("Tpetra::DistObject::doPackAndPrepare");
1569 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
1570  // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in
1571  // favor of Kokkos profiling.
1572  Teuchos::TimeMonitor packAndPrepareMon (*packAndPrepareTimer_);
1573 #endif // HAVE_TPETRA_TRANSFER_TIMERS
1574 
1575  // Ask the source to pack data. Also ask it whether there are
1576  // a constant number of packets per element
1577  // (constantNumPackets is an output argument). If there are,
1578  // constantNumPackets will come back nonzero. Otherwise, the
1579  // source will fill the numExportPacketsPerLID_ array.
1580 
1581  // FIXME (mfh 18 Oct 2017) if (! commOnHost), sync to device?
1582  // Alternately, make packAndPrepare take a "commOnHost"
1583  // argument to tell it where to leave the data?
1584  //
1585  // NOTE (mfh 04 Feb 2019) Subclasses of DistObject should have
1586  // the freedom to pack and unpack either on host or device.
1587  // We should prefer sync'ing only on demand. Thus, we can
1588  // answer the above question: packAndPrepare should not
1589  // take a commOnHost argument, and doTransferNew should sync
1590  // where needed, if needed.
1591  if (debug) {
1592  std::ostringstream lclErrStrm;
1593  bool lclSuccess = false;
1594  try {
1595  this->packAndPrepare (src, exportLIDs, this->exports_,
1596  this->numExportPacketsPerLID_,
1597  constantNumPackets, space);
1598  lclSuccess = true;
1599  }
1600  catch (std::exception& e) {
1601  lclErrStrm << "packAndPrepare threw an exception: "
1602  << endl << e.what();
1603  }
1604  catch (...) {
1605  lclErrStrm << "packAndPrepare threw an exception "
1606  "not a subclass of std::exception.";
1607  }
1608  const char gblErrMsgHeader[] = "Tpetra::DistObject "
1609  "threw an exception in packAndPrepare on "
1610  "one or more processes in the DistObject's communicator.";
1611  auto comm = getMap()->getComm();
1612  Details::checkGlobalError(std::cerr, lclSuccess,
1613  lclErrStrm.str().c_str(),
1614  gblErrMsgHeader, *comm);
1615  }
1616  else {
1617  this->packAndPrepare (src, exportLIDs, this->exports_,
1618  this->numExportPacketsPerLID_,
1619  constantNumPackets, space);
1620  }
1621  }
1622 
1623  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1624  void
1625  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1626  doUnpackAndCombine(const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& remoteLIDs,
1627  size_t constantNumPackets,
1628  CombineMode CM,
1629  const execution_space &space)
1630  {
1631  using Details::ProfilingRegion;
1632  using std::endl;
1633  const bool debug = Details::Behavior::debug("DistObject");
1634 
1635  ProfilingRegion region_uc
1636  ("Tpetra::DistObject::doUnpackAndCombine");
1637 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
1638  // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in
1639  // favor of Kokkos profiling.
1640  Teuchos::TimeMonitor unpackAndCombineMon (*unpackAndCombineTimer_);
1641 #endif // HAVE_TPETRA_TRANSFER_TIMERS
1642 
1643  if (debug) {
1644  std::ostringstream lclErrStrm;
1645  bool lclSuccess = false;
1646  try {
1647  this->unpackAndCombine (remoteLIDs, this->imports_,
1648  this->numImportPacketsPerLID_,
1649  constantNumPackets, CM, space);
1650  lclSuccess = true;
1651  }
1652  catch (std::exception& e) {
1653  lclErrStrm << "doUnpackAndCombine threw an exception: "
1654  << endl << e.what();
1655  }
1656  catch (...) {
1657  lclErrStrm << "doUnpackAndCombine threw an exception "
1658  "not a subclass of std::exception.";
1659  }
1660  const char gblErrMsgHeader[] = "Tpetra::DistObject "
1661  "threw an exception in unpackAndCombine on "
1662  "one or more processes in the DistObject's communicator.";
1663  auto comm = getMap()->getComm();
1664  Details::checkGlobalError(std::cerr, lclSuccess,
1665  lclErrStrm.str().c_str(),
1666  gblErrMsgHeader, *comm);
1667  }
1668  else {
1669  this->unpackAndCombine (remoteLIDs, this->imports_,
1670  this->numImportPacketsPerLID_,
1671  constantNumPackets, CM, space);
1672  }
1673  }
1674 
1675  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1676  void
1679  (const SrcDistObject&,
1680  const size_t,
1681  const Kokkos::DualView<
1682  const local_ordinal_type*,
1684  const Kokkos::DualView<
1685  const local_ordinal_type*,
1687  const CombineMode CM)
1688  {}
1689 
1690 // clang-format on
1691 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1693  const SrcDistObject &source, const size_t numSameIDs,
1694  const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1695  &permuteToLIDs,
1696  const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1697  &permuteFromLIDs,
1698  const CombineMode CM, const execution_space &space) {
1699  /*
1700  This is called if the derived class doesn't know how to pack and prepare in
1701  an arbitrary execution space instance, but it was asked to anyway.
1702  Provide a safe illusion by actually doing the work in the default instance,
1703  and syncing the default instance with the provided instance.
1704  The caller expects
1705  1. any work in the provided instance to complete before this.
1706  2. This to complete before any following work in the provided instance.
1707  */
1708 
1709  space.fence(); // // TODO: Tpetra::Details::Spaces::exec_space_wait
1710  copyAndPermute(source, numSameIDs, permuteToLIDs, permuteFromLIDs,
1711  CM); // default instance
1712  execution_space().fence(); // TODO:
1713  // Tpetra::Details::Spaces::exec_space_wait
1714 }
1715 // clang-format off
1716 
1717 
1718  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1719  void
1722  (const SrcDistObject&,
1723  const Kokkos::DualView<
1724  const local_ordinal_type*,
1726  Kokkos::DualView<
1727  packet_type*,
1729  Kokkos::DualView<
1730  size_t*,
1732  size_t&)
1733  {}
1734 
1735 // clang-format on
1736 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1738  const SrcDistObject &source,
1739  const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1740  &exportLIDs,
1741  Kokkos::DualView<packet_type *, buffer_device_type> &exports,
1742  Kokkos::DualView<size_t *, buffer_device_type> numPacketsPerLID,
1743  size_t &constantNumPackets, const execution_space &space) {
1744  /*
1745  This is called if the derived class doesn't know how to pack and prepare in
1746  an arbitrary execution space instance, but it was asked to anyway.
1747  Provide a safe illusion by actually doing the work in the default instance,
1748  and syncing the default instance with the provided instance.
1749 
1750  The caller expects
1751  1. any work in the provided instance to complete before this.
1752  2. This to complete before any following work in the provided instance.
1753  */
1754 
1755  // wait for any work from prior operations in the provided instance to
1756  // complete
1757  space.fence(); // TODO: Details::Spaces::exec_space_wait
1758 
1759  // pack and prepare in the default instance.
1760  packAndPrepare(source, exportLIDs, exports, numPacketsPerLID,
1761  constantNumPackets); // default instance
1762 
1763  // wait for the default instance to complete before returning, so any
1764  // following work inserted into the provided instance will be done after this
1765  execution_space().fence(); // TODO: Details::Spaces::exec_space_wait
1766 }
1767 // clang-format off
1768 
1769  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1770  void
1773  (const Kokkos::DualView<
1774  const local_ordinal_type*,
1775  buffer_device_type>& /* importLIDs */,
1776  Kokkos::DualView<
1777  packet_type*,
1778  buffer_device_type> /* imports */,
1779  Kokkos::DualView<
1780  size_t*,
1781  buffer_device_type> /* numPacketsPerLID */,
1782  const size_t /* constantNumPackets */,
1783  const CombineMode /* combineMode */)
1784  {}
1785 
1786 // clang-format on
1787 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1789  const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1790  &importLIDs,
1791  Kokkos::DualView<packet_type *, buffer_device_type> imports,
1792  Kokkos::DualView<size_t *, buffer_device_type> numPacketsPerLID,
1793  const size_t constantNumPackets, const CombineMode combineMode,
1794  const execution_space &space) {
1795  // Wait for any work in the provided space to complete
1796  space.fence(); // TODO: Details::Spaces::exec_space_wait(execution_space(),
1797  // space);
1798  unpackAndCombine(importLIDs, imports, numPacketsPerLID, constantNumPackets,
1799  combineMode); // default instance
1800  // wait for unpack to finish in the default instance, since the caller
1801  // may be expecting sequential semantics in the `space` instance
1802  execution_space().fence(); // TODO: Details::Spaces::exec_space_wait(space,
1803  // execution_space());
1804 }
1805 // clang-format off
1806 
1807 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1809  std::ostream &os) const {
1810  using std::endl;
1811  using Teuchos::FancyOStream;
1812  using Teuchos::getFancyOStream;
1813  using Teuchos::RCP;
1814  using Teuchos::rcpFromRef;
1815 
1816  RCP<FancyOStream> out = getFancyOStream(rcpFromRef(os));
1817  this->describe(*out, Teuchos::VERB_DEFAULT);
1818 }
1819 
1820 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1821 std::unique_ptr<std::string>
1823  const char className[], const char methodName[]) const {
1824  auto map = this->getMap();
1825  auto comm = map.is_null() ? Teuchos::null : map->getComm();
1826  return Details::createPrefix(comm.getRawPtr(), className, methodName);
1827 }
1828 
1829 template <class DistObjectType>
1831  Teuchos::RCP<DistObjectType> &input,
1832  const Teuchos::RCP<const Map<typename DistObjectType::local_ordinal_type,
1833  typename DistObjectType::global_ordinal_type,
1834  typename DistObjectType::node_type>> &newMap) {
1835  input->removeEmptyProcessesInPlace(newMap);
1836  if (newMap.is_null()) { // my process is excluded
1837  input = Teuchos::null;
1838  }
1839 }
1840 
1841 template <class DistObjectType>
1842 void removeEmptyProcessesInPlace(Teuchos::RCP<DistObjectType> &input) {
1843  auto newMap = input->getMap()->removeEmptyProcesses();
1844  removeEmptyProcessesInPlace<DistObjectType>(input, newMap);
1845 }
1846 
1847 // Explicit instantiation macro for general DistObject.
1848 #define TPETRA_DISTOBJECT_INSTANT(SCALAR, LO, GO, NODE) \
1849  template class DistObject<SCALAR, LO, GO, NODE>;
1850 
1851 // Explicit instantiation macro for DistObject<char, ...>.
1852 // The "SLGN" stuff above doesn't work for Packet=char.
1853 #define TPETRA_DISTOBJECT_INSTANT_CHAR(LO, GO, NODE) \
1854  template class DistObject<char, LO, GO, NODE>;
1855 
1856 } // namespace Tpetra
1857 
1858 #endif // TPETRA_DISTOBJECT_DEF_HPP
1859 // clang-format on
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
const Details::DistributorPlan & getPlan() const
Get this Distributor&#39;s DistributorPlan.
virtual void copyAndPermute(const SrcDistObject &source, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM)
Perform copies and permutations that are local to the calling (MPI) process.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
void doImport(const SrcDistObject &source, const Import< LocalOrdinal, GlobalOrdinal, Node > &importer, const CombineMode CM, const bool restrictedMode=false)
Import data into this object using an Import object (&quot;forward mode&quot;).
typename::Kokkos::ArithTraits< Packet >::val_type packet_type
The type of each datum being sent or received in an Import or Export.
void print(std::ostream &os) const
Print this object to the given output stream.
virtual bool reallocArraysForNumPacketsPerLid(const size_t numExportLIDs, const size_t numImportLIDs)
Reallocate numExportPacketsPerLID_ and/or numImportPacketsPerLID_, if necessary.
bool isDistributed() const
Whether this is a globally distributed object.
void removeEmptyProcessesInPlace(Teuchos::RCP< DistObjectType > &input, const Teuchos::RCP< const Map< typename DistObjectType::local_ordinal_type, typename DistObjectType::global_ordinal_type, typename DistObjectType::node_type > > &newMap)
Remove processes which contain no elements in this object&#39;s Map.
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode)
Perform any unpacking and combining after communication.
static bool debug()
Whether Tpetra is in debug mode.
virtual void doTransfer(const SrcDistObject &src, const ::Tpetra::Details::Transfer< local_ordinal_type, global_ordinal_type, node_type > &transfer, const char modeString[], const ReverseOption revOp, const CombineMode CM, const bool restrictedMode)
Redistribute data across (MPI) processes.
void beginTransfer(const SrcDistObject &src, const ::Tpetra::Details::Transfer< local_ordinal_type, global_ordinal_type, node_type > &transfer, const char modeString[], const ReverseOption revOp, const CombineMode CM, const bool restrictedMode)
Implementation detail of doTransfer.
typename device_type::execution_space execution_space
The Kokkos execution space.
Kokkos::DualView< T *, DT > getDualViewCopyFromArrayView(const Teuchos::ArrayView< const T > &x_av, const char label[], const bool leaveOnHost)
Get a 1-D Kokkos::DualView which is a deep copy of the input Teuchos::ArrayView (which views host mem...
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
virtual void packAndPrepare(const SrcDistObject &source, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< packet_type *, buffer_device_type > &exports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, size_t &constantNumPackets)
Pack data and metadata for communication (sends).
Sets up and executes a communication plan for a Tpetra DistObject.
static bool verbose()
Whether Tpetra is in verbose mode.
CombineMode
Rule for combining data in an Import or Export.
bool reallocDualViewIfNeeded(Kokkos::DualView< ValueType *, DeviceType > &dv, const size_t newSize, const char newLabel[], const size_t tooBigFactor=2, const bool needFenceBeforeRealloc=true)
Reallocate the DualView in/out argument, if needed.
Abstract base class for objects that can be the source of an Import or Export operation.
Declaration and definition of Tpetra::Details::reallocDualViewIfNeeded, an implementation detail of T...
LocalOrdinal local_ordinal_type
The type of local indices.
Replace old values with zero.
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
ReverseOption
Whether the data transfer should be performed in forward or reverse mode.
DistObject(const Teuchos::RCP< const map_type > &map)
Constructor.
std::string dualViewStatusToString(const DualViewType &dv, const char name[])
Return the status of the given Kokkos::DualView, as a human-readable string.
virtual std::string description() const
One-line descriptiion of this object.
bool transferArrived() const
Whether the data from an import/export operation has arrived, and is ready for the unpack and combine...
virtual size_t constantNumberOfPackets() const
Whether the implementation&#39;s instance promises always to have a constant number of packets per LID (l...
virtual bool reallocImportsIfNeeded(const size_t newSize, const bool verbose, const std::string *prefix, const bool remoteLIDsContiguous=false, const CombineMode CM=INSERT)
Reallocate imports_ if needed.
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Export data into this object using an Export object (&quot;forward mode&quot;).
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
Stand-alone utility functions and macros.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print a descriptiion of this object to the given output stream.
Kokkos::Device< typename device_type::execution_space, buffer_memory_space > buffer_device_type
Kokkos::Device specialization for communication buffers.
Base class for distributed Tpetra objects that support data redistribution.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap)
Remove processes which contain no entries in this object&#39;s Map.
Description of Tpetra&#39;s behavior.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.