Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Export_def.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_EXPORT_DEF_HPP
11 #define TPETRA_EXPORT_DEF_HPP
12 
13 #include "Tpetra_Distributor.hpp"
14 #include "Tpetra_Map.hpp"
15 #include "Tpetra_ImportExportData.hpp"
16 #include "Tpetra_Util.hpp"
17 #include "Tpetra_Import.hpp"
18 #include "Tpetra_Details_DualViewUtil.hpp"
20 #include "Teuchos_as.hpp"
21 #include "Teuchos_Array.hpp"
22 #include "Teuchos_FancyOStream.hpp"
23 #include "Teuchos_ParameterList.hpp"
24 #include <memory>
25 
26 namespace Tpetra {
27 
28 template <class LocalOrdinal, class GlobalOrdinal, class Node>
30  Export(const Teuchos::RCP<const map_type>& source,
31  const Teuchos::RCP<const map_type>& target,
32  const Teuchos::RCP<Teuchos::FancyOStream>& out,
33  const Teuchos::RCP<Teuchos::ParameterList>& plist)
34  : base_type(source, target, out, plist, "Export") {
35  using std::endl;
36  using Teuchos::rcp;
37  using ::Tpetra::Details::ProfilingRegion;
38  ProfilingRegion regionExport("Tpetra::Export::Export");
39 
40  if (this->verbose()) {
41  std::ostringstream os;
42  const int myRank = source->getComm()->getRank();
43  os << myRank << ": Export ctor" << endl;
44  this->verboseOutputStream() << os.str();
45  }
46  Teuchos::Array<GlobalOrdinal> exportGIDs;
47  setupSamePermuteExport(exportGIDs);
48  if (source->isDistributed()) {
49  setupRemote(exportGIDs);
50  }
51 
52  TEUCHOS_ASSERT(!this->TransferData_->permuteFromLIDs_.need_sync_device());
53  TEUCHOS_ASSERT(!this->TransferData_->permuteFromLIDs_.need_sync_host());
54  TEUCHOS_ASSERT(!this->TransferData_->permuteToLIDs_.need_sync_device());
55  TEUCHOS_ASSERT(!this->TransferData_->permuteToLIDs_.need_sync_host());
56  TEUCHOS_ASSERT(!this->TransferData_->remoteLIDs_.need_sync_device());
57  TEUCHOS_ASSERT(!this->TransferData_->remoteLIDs_.need_sync_host());
58  TEUCHOS_ASSERT(!this->TransferData_->exportLIDs_.need_sync_device());
59  TEUCHOS_ASSERT(!this->TransferData_->exportLIDs_.need_sync_host());
60 
61  this->detectRemoteExportLIDsContiguous();
62 
63  if (this->verbose()) {
64  std::ostringstream os;
65  const int myRank = source->getComm()->getRank();
66  os << myRank << ": Export ctor: done" << endl;
67  this->verboseOutputStream() << os.str();
68  }
69 }
70 
71 template <class LocalOrdinal, class GlobalOrdinal, class Node>
73  Export(const Teuchos::RCP<const map_type>& source,
74  const Teuchos::RCP<const map_type>& target)
75  : Export(source, target, Teuchos::null, Teuchos::null) {}
76 
77 template <class LocalOrdinal, class GlobalOrdinal, class Node>
79  Export(const Teuchos::RCP<const map_type>& source,
80  const Teuchos::RCP<const map_type>& target,
81  const Teuchos::RCP<Teuchos::FancyOStream>& out)
82  : Export(source, target, out, Teuchos::null) {}
83 
84 template <class LocalOrdinal, class GlobalOrdinal, class Node>
86  Export(const Teuchos::RCP<const map_type>& source,
87  const Teuchos::RCP<const map_type>& target,
88  const Teuchos::RCP<Teuchos::ParameterList>& plist)
89  : Export(source, target, Teuchos::null, plist) {}
90 
91 template <class LocalOrdinal, class GlobalOrdinal, class Node>
94  : base_type(rhs) {}
95 
96 template <class LocalOrdinal, class GlobalOrdinal, class Node>
99  : base_type(importer, typename base_type::reverse_tag()) {}
100 
101 template <class LocalOrdinal, class GlobalOrdinal, class Node>
103  describe(Teuchos::FancyOStream& out,
104  const Teuchos::EVerbosityLevel verbLevel) const {
105  // Call the base class' method. It does all the work.
106  this->describeImpl(out, "Tpetra::Export", verbLevel);
107 }
108 
109 template <class LocalOrdinal, class GlobalOrdinal, class Node>
111  print(std::ostream& os) const {
112  auto out = Teuchos::getFancyOStream(Teuchos::rcpFromRef(os));
113  // "Print" traditionally meant "everything."
114  this->describe(*out, Teuchos::VERB_EXTREME);
115 }
116 
117 template <class LocalOrdinal, class GlobalOrdinal, class Node>
119  setupSamePermuteExport(Teuchos::Array<GlobalOrdinal>& exportGIDs) {
120  using std::endl;
121  using Teuchos::arcp;
122  using Teuchos::Array;
123  using Teuchos::ArrayRCP;
124  using Teuchos::ArrayView;
125  using Teuchos::as;
126  using Teuchos::null;
127  using ::Tpetra::Details::makeDualViewFromOwningHostView;
128  using ::Tpetra::Details::ProfilingRegion;
129  using ::Tpetra::Details::view_alloc_no_init;
130  using LO = LocalOrdinal;
131  using GO = GlobalOrdinal;
132  using size_type = typename ArrayView<const GO>::size_type;
133  const char tfecfFuncName[] = "setupSamePermuteExport: ";
134  ProfilingRegion regionExport("Tpetra::Export::setupSamePermuteExport");
135 
136  std::unique_ptr<std::string> prefix;
137  if (this->verbose()) {
138  auto srcMap = this->getSourceMap();
139  auto comm = srcMap.is_null() ? Teuchos::null : srcMap->getComm();
140  const int myRank = comm.is_null() ? -1 : comm->getRank();
141 
142  std::ostringstream os;
143  os << "Proc " << myRank << ": Tpetra::Export::setupSamePermuteExport: ";
144  prefix = std::unique_ptr<std::string>(new std::string(os.str()));
145 
146  std::ostringstream os2;
147  os2 << *prefix << "Start" << std::endl;
148  this->verboseOutputStream() << os2.str();
149  }
150 
151  const map_type& source = *(this->getSourceMap());
152  const map_type& target = *(this->getTargetMap());
153  ArrayView<const GO> sourceGIDs = source.getLocalElementList();
154  ArrayView<const GO> targetGIDs = target.getLocalElementList();
155 
156 #ifdef HAVE_TPETRA_DEBUG
157  ArrayView<const GO> rawSrcGids = sourceGIDs;
158  ArrayView<const GO> rawTgtGids = targetGIDs;
159 #else
160  const GO* const rawSrcGids = sourceGIDs.getRawPtr();
161  const GO* const rawTgtGids = targetGIDs.getRawPtr();
162 #endif // HAVE_TPETRA_DEBUG
163  const size_type numSrcGids = sourceGIDs.size();
164  const size_type numTgtGids = targetGIDs.size();
165  const size_type numGids = std::min(numSrcGids, numTgtGids);
166 
167  // Compute numSameIDs_: the number of initial GIDs that are the
168  // same (and occur in the same order) in both Maps. The point of
169  // numSameIDs_ is for the common case of an Export where all the
170  // overlapping GIDs are at the end of the source Map, but
171  // otherwise the source and target Maps are the same. This allows
172  // a fast contiguous copy for the initial "same IDs."
173  size_type numSameGids = 0;
174  for (; numSameGids < numGids &&
175  rawSrcGids[numSameGids] == rawTgtGids[numSameGids];
176  ++numSameGids) {
177  } // third clause of 'for' does everything
178  this->TransferData_->numSameIDs_ = numSameGids;
179 
180  if (this->verbose()) {
181  std::ostringstream os;
182  os << *prefix << "numIDs: " << numGids
183  << ", numSameIDs: " << numSameGids << endl;
184  this->verboseOutputStream() << os.str();
185  }
186 
187  // Compute permuteToLIDs_, permuteFromLIDs_, exportGIDs, and
188  // exportLIDs_. The first two arrays are IDs to be permuted, and
189  // the latter two arrays are IDs to sent out ("exported"), called
190  // "export" IDs.
191  //
192  // IDs to permute are in both the source and target Maps, which
193  // means we don't have to send or receive them, but we do have to
194  // rearrange (permute) them in general. IDs to send are in the
195  // source Map, but not in the target Map.
196 
197  // Iterate over the source Map's LIDs, since we only need to do
198  // GID -> LID lookups for the target Map.
199  const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid();
200  const LO numSrcLids = static_cast<LO>(numSrcGids);
201  LO numPermutes = 0;
202  LO numExports = 0;
203 
204  for (LO srcLid = numSameGids; srcLid < numSrcLids; ++srcLid) {
205  const GO curSrcGid = rawSrcGids[srcLid];
206  // getLocalElement() returns LINVALID if the GID isn't in the
207  // target Map. This saves us a lookup (which
208  // isNodeGlobalElement() would do).
209  const LO tgtLid = target.getLocalElement(curSrcGid);
210  if (tgtLid != LINVALID) { // if target.isNodeGlobalElement (curSrcGid)
211  ++numPermutes;
212  } else {
213  ++numExports;
214  }
215  }
216  if (this->verbose()) {
217  std::ostringstream os;
218  os << *prefix << "numPermutes: " << numPermutes
219  << ", numExports: " << numExports << endl;
220  this->verboseOutputStream() << os.str();
221  }
222  TEUCHOS_ASSERT(numPermutes + numExports ==
223  numSrcLids - numSameGids);
224 
225  typename decltype(this->TransferData_->permuteToLIDs_)::t_host
226  permuteToLIDs(view_alloc_no_init("permuteToLIDs"), numPermutes);
227  typename decltype(this->TransferData_->permuteToLIDs_)::t_host
228  permuteFromLIDs(view_alloc_no_init("permuteFromLIDs"), numPermutes);
229  typename decltype(this->TransferData_->permuteToLIDs_)::t_host
230  exportLIDs(view_alloc_no_init("exportLIDs"), numExports);
231 
232  // FIXME (mfh 03 Feb 2019) Replace with std::unique_ptr of array,
233  // to avoid superfluous initialization on resize.
234  exportGIDs.resize(numExports);
235 
236  {
237  LO numPermutes2 = 0;
238  LO numExports2 = 0;
239  for (LO srcLid = numSameGids; srcLid < numSrcLids; ++srcLid) {
240  const GO curSrcGid = rawSrcGids[srcLid];
241  const LO tgtLid = target.getLocalElement(curSrcGid);
242  if (tgtLid != LINVALID) {
243  permuteToLIDs[numPermutes2] = tgtLid;
244  permuteFromLIDs[numPermutes2] = srcLid;
245  ++numPermutes2;
246  } else {
247  exportGIDs[numExports2] = curSrcGid;
248  exportLIDs[numExports2] = srcLid;
249  ++numExports2;
250  }
251  }
252  TEUCHOS_ASSERT(numPermutes == numPermutes2);
253  TEUCHOS_ASSERT(numExports == numExports2);
254  TEUCHOS_ASSERT(size_t(numExports) == size_t(exportGIDs.size()));
255  }
256 
257  // Defer making this->TransferData_->exportLIDs_ until after
258  // getRemoteIndexList, since we might need to shrink it then.
259 
260  // exportLIDs is the list of this process' LIDs that it has to
261  // send out. Since this is an Export, and therefore the target
262  // Map is nonoverlapping, we know that each export LID only needs
263  // to be sent to one process. However, the source Map may be
264  // overlapping, so multiple processes might send to the same LID
265  // on a receiving process.
266 
267  if (numExports != 0 && !source.isDistributed()) {
268  // This Export has export LIDs, meaning that the source Map has
269  // entries on this process that are not in the target Map on
270  // this process. However, the source Map is not distributed
271  // globally. This implies that this Import is not locally
272  // complete on this process.
273  this->TransferData_->isLocallyComplete_ = false;
274  if (this->verbose()) {
275  std::ostringstream os;
276  os << *prefix << "Export is not locally complete" << endl;
277  this->verboseOutputStream() << os.str();
278  }
279  // mfh 12 Sep 2016: I disagree that this is "abuse"; it may be
280  // correct behavior, depending on the circumstances.
281  TPETRA_ABUSE_WARNING(true, std::runtime_error,
282  "::setupSamePermuteExport(): Source has "
283  "export LIDs but Source is not distributed globally. Exporting to "
284  "a submap of the target map.");
285  }
286 
287  // Compute exportPIDs_ ("outgoing" process IDs).
288  //
289  // For each GID in exportGIDs (GIDs to which this process must
290  // send), find its corresponding owning process (a.k.a. "image")
291  // ID in the target Map. Store these process IDs in
292  // exportPIDs_. These are the process IDs to which the Export
293  // needs to send data.
294  //
295  // We only need to do this if the source Map is distributed;
296  // otherwise, the Export doesn't have to perform any
297  // communication.
298  if (source.isDistributed()) {
299  if (this->verbose()) {
300  std::ostringstream os;
301  os << *prefix << "Source Map is distributed; "
302  "call targetMap.getRemoteiNdexList"
303  << endl;
304  this->verboseOutputStream() << os.str();
305  }
306  this->TransferData_->exportPIDs_.resize(exportGIDs.size());
307  // This call will assign any GID in the target Map with no
308  // corresponding process ID a fake process ID of -1. We'll use
309  // this below to remove exports for processses that don't exist.
310  const LookupStatus lookup =
311  target.getRemoteIndexList(exportGIDs(),
312  this->TransferData_->exportPIDs_());
313  // mfh 12 Sep 2016: I disagree that this is "abuse"; it may be
314  // correct behavior, depending on the circumstances.
315  TPETRA_ABUSE_WARNING(lookup == IDNotPresent, std::runtime_error,
316  "::setupSamePermuteExport(): The source Map has GIDs not found "
317  "in the target Map.");
318 
319  // Get rid of process IDs not in the target Map. This prevents
320  // exporting to GIDs which don't belong to any process in the
321  // target Map.
322  if (lookup == IDNotPresent) {
323  // There is at least one GID owned by the calling process in
324  // the source Map, which is not owned by any process in the
325  // target Map.
326  this->TransferData_->isLocallyComplete_ = false;
327 
328  Teuchos::Array<int>& exportPIDs = this->TransferData_->exportPIDs_;
329 
330  const size_type totalNumExports = exportPIDs.size();
331  const size_type numInvalidExports =
332  std::count_if(exportPIDs.begin(), exportPIDs.end(),
333  [](const int procId) { return procId == -1; });
334  if (this->verbose()) {
335  std::ostringstream os;
336  os << *prefix << "totalNumExports: " << totalNumExports
337  << ", numInvalidExports: " << numInvalidExports << endl;
338  this->verboseOutputStream() << os.str();
339  }
340  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numInvalidExports == 0, std::logic_error,
341  "targetMap.getRemoteIndexList returned IDNotPresent, but no export "
342  "PIDs are -1. Please report this bug to the Tpetra developers.");
343 
344  // We know that at least one export ID is invalid, that is,
345  // not in any process on the target Map. If all export IDs
346  // are invalid, we can delete all exports. Otherwise, keep
347  // the valid exports and discard the rest. This is legit
348  // Petra Object Model behavior, but it's a less common case.
349 
350  if (numInvalidExports == totalNumExports) {
351  exportGIDs.resize(0);
352  exportLIDs = decltype(exportLIDs)();
353  exportPIDs.resize(0);
354  } else {
355  size_type numValidExports = 0;
356  for (size_type e = 0; e < totalNumExports; ++e) {
357  if (this->TransferData_->exportPIDs_[e] != -1) {
358  exportGIDs[numValidExports] = exportGIDs[e];
359  exportLIDs[numValidExports] = exportLIDs[e];
360  exportPIDs[numValidExports] = exportPIDs[e];
361  ++numValidExports;
362  }
363  }
364  exportGIDs.resize(numValidExports);
365  Kokkos::resize(exportLIDs, numValidExports);
366  exportPIDs.resize(numValidExports);
367  }
368  }
369  }
370 
371  // FIXME (mfh 03 Feb 2019) These three DualViews could share a
372  // single device allocation, in order to avoid high cudaMalloc
373  // cost and device memory fragmentation.
374  makeDualViewFromOwningHostView(this->TransferData_->permuteToLIDs_, permuteToLIDs);
375  makeDualViewFromOwningHostView(this->TransferData_->permuteFromLIDs_, permuteFromLIDs);
376  makeDualViewFromOwningHostView(this->TransferData_->exportLIDs_, exportLIDs);
377 
378  if (this->verbose()) {
379  std::ostringstream os;
380  os << *prefix << "Done!" << std::endl;
381  this->verboseOutputStream() << os.str();
382  }
383 }
384 
385 template <class LocalOrdinal, class GlobalOrdinal, class Node>
386 void Export<LocalOrdinal, GlobalOrdinal, Node>::
387  setupRemote(Teuchos::Array<GlobalOrdinal>& exportGIDs) {
388  using std::endl;
389  using Teuchos::Array;
390  using ::Tpetra::Details::makeDualViewFromOwningHostView;
391  using ::Tpetra::Details::view_alloc_no_init;
392  using LO = LocalOrdinal;
393  using GO = GlobalOrdinal;
394 
395  std::unique_ptr<std::string> prefix;
396  if (this->verbose()) {
397  auto srcMap = this->getSourceMap();
398  auto comm = srcMap.is_null() ? Teuchos::null : srcMap->getComm();
399  const int myRank = comm.is_null() ? -1 : comm->getRank();
400 
401  std::ostringstream os;
402  os << "Proc " << myRank << ": Tpetra::Export::setupRemote: ";
403  prefix = std::unique_ptr<std::string>(new std::string(os.str()));
404 
405  std::ostringstream os2;
406  os2 << *prefix << "Start" << std::endl;
407  this->verboseOutputStream() << os2.str();
408  }
409 
410  TEUCHOS_ASSERT(!this->getTargetMap().is_null());
411  const map_type& tgtMap = *(this->getTargetMap());
412 
413  // Sort exportPIDs_ in ascending order, and apply the same
414  // permutation to exportGIDs_ and exportLIDs_. This ensures that
415  // exportPIDs_[i], exportGIDs_[i], and exportLIDs_[i] all
416  // refer to the same thing.
417  {
418  TEUCHOS_ASSERT(size_t(this->TransferData_->exportLIDs_.extent(0)) ==
419  size_t(this->TransferData_->exportPIDs_.size()));
420  this->TransferData_->exportLIDs_.modify_host();
421  auto exportLIDs = this->TransferData_->exportLIDs_.view_host();
422  sort3(this->TransferData_->exportPIDs_.begin(),
423  this->TransferData_->exportPIDs_.end(),
424  exportGIDs.getRawPtr(),
425  exportLIDs.data());
426  this->TransferData_->exportLIDs_.sync_device();
427  // FIXME (mfh 03 Feb 2019) We actually end up sync'ing
428  // exportLIDs_ to device twice, once in setupSamePermuteExport,
429  // and once here. We could avoid the first sync.
430  }
431 
432  if (this->verbose()) {
433  std::ostringstream os;
434  os << *prefix << "Call createFromSends" << endl;
435  this->verboseOutputStream() << os.str();
436  }
437 
438  // Construct the list of entries that calling image needs to send
439  // as a result of everyone asking for what it needs to receive.
440  //
441  // mfh 05 Jan 2012: I understand the above comment as follows:
442  // Construct the communication plan from the list of image IDs to
443  // which we need to send.
444  Teuchos::Array<int>& exportPIDs = this->TransferData_->exportPIDs_;
445  Distributor& distributor = this->TransferData_->distributor_;
446  const size_t numRemoteIDs = distributor.createFromSends(exportPIDs());
447 
448  if (this->verbose()) {
449  std::ostringstream os;
450  os << *prefix << "numRemoteIDs: " << numRemoteIDs
451  << "; call doPostsAndWaits" << endl;
452  this->verboseOutputStream() << os.str();
453  }
454 
455  // Use the communication plan with ExportGIDs to find out who is
456  // sending to us and get the proper ordering of GIDs for incoming
457  // remote entries (these will be converted to LIDs when done).
458 
459  Kokkos::View<const GO*, Kokkos::HostSpace> exportGIDsConst(exportGIDs.data(), exportGIDs.size());
460  Kokkos::View<GO*, Kokkos::HostSpace> remoteGIDs("remoteGIDs", numRemoteIDs);
461  distributor.doPostsAndWaits(exportGIDsConst, 1, remoteGIDs);
462 
463  // Remote (incoming) IDs come in as GIDs; convert to LIDs. LIDs
464  // tell this process where to store the incoming remote data.
465  using host_remote_lids_type =
466  typename decltype(this->TransferData_->remoteLIDs_)::t_host;
467  host_remote_lids_type remoteLIDs(view_alloc_no_init("remoteLIDs"), numRemoteIDs);
468 
469  for (LO j = 0; j < LO(numRemoteIDs); ++j) {
470  remoteLIDs[j] = tgtMap.getLocalElement(remoteGIDs[j]);
471  }
472  makeDualViewFromOwningHostView(this->TransferData_->remoteLIDs_, remoteLIDs);
473 
474  if (this->verbose()) {
475  std::ostringstream os;
476  os << *prefix << "Done!" << endl;
477  this->verboseOutputStream() << os.str();
478  }
479 }
480 
481 } // namespace Tpetra
482 
483 // Explicit instantiation macro.
484 // Only invoke this when in the Tpetra namespace.
485 // Most users do not need to use this.
486 //
487 // LO: The local ordinal type.
488 // GO: The global ordinal type.
489 // NODE: The Kokkos Node type.
490 #define TPETRA_EXPORT_INSTANT(LO, GO, NODE) \
491  template class Export<LO, GO, NODE>;
492 
493 #endif // TPETRA_EXPORT_DEF_HPP
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Teuchos::FancyOStream & verboseOutputStream() const
Valid (nonnull) output stream for verbose output.
virtual void print(std::ostream &os) const
Print the Export&#39;s data to the given output stream.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
void sort3(const IT1 &first1, const IT1 &last1, const IT2 &first2, const IT3 &first3, const bool stableSort=false)
Sort the first array, and apply the same permutation to the second and third arrays.
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Describe this object in a human-readable way to the given output stream.
Export(const Teuchos::RCP< const map_type > &source, const Teuchos::RCP< const map_type > &target)
Construct a Export object from the source and target Map.
auto view_alloc_no_init(const std::string &label) -> decltype(Kokkos::view_alloc(label, Kokkos::WithoutInitializing))
Use in place of the string label as the first argument of Kokkos::View&#39;s constructor, in case you want to allocate without initializing.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
#define TPETRA_ABUSE_WARNING(throw_exception_test, Exception, msg)
Handle an abuse warning, according to HAVE_TPETRA_THROW_ABUSE_WARNINGS and HAVE_TPETRA_PRINT_ABUSE_WA...
Teuchos::RCP< ImportExportData< LocalOrdinal, GlobalOrdinal, Node > > TransferData_
All the data needed for executing the Export communication plan.
Stand-alone utility functions and macros.
void makeDualViewFromOwningHostView(Kokkos::DualView< ElementType *, DeviceType > &dv, const typename Kokkos::DualView< ElementType *, DeviceType >::t_host &hostView)
Initialize dv such that its host View is hostView.
bool verbose() const
Whether to print verbose debugging output.