Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Export_def.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_EXPORT_DEF_HPP
11 #define TPETRA_EXPORT_DEF_HPP
12 
13 
14 #include "Tpetra_Distributor.hpp"
15 #include "Tpetra_Map.hpp"
16 #include "Tpetra_ImportExportData.hpp"
17 #include "Tpetra_Util.hpp"
18 #include "Tpetra_Import.hpp"
19 #include "Tpetra_Details_DualViewUtil.hpp"
21 #include "Teuchos_as.hpp"
22 #include "Teuchos_Array.hpp"
23 #include "Teuchos_FancyOStream.hpp"
24 #include "Teuchos_ParameterList.hpp"
25 #include <memory>
26 
27 namespace Tpetra {
28 
29  template <class LocalOrdinal, class GlobalOrdinal, class Node>
31  Export (const Teuchos::RCP<const map_type >& source,
32  const Teuchos::RCP<const map_type >& target,
33  const Teuchos::RCP<Teuchos::FancyOStream>& out,
34  const Teuchos::RCP<Teuchos::ParameterList>& plist) :
35  base_type (source, target, out, plist, "Export")
36  {
37  using Teuchos::rcp;
38  using std::endl;
39  using ::Tpetra::Details::ProfilingRegion;
40  ProfilingRegion regionExport ("Tpetra::Export::Export");
41 
42  if (this->verbose ()) {
43  std::ostringstream os;
44  const int myRank = source->getComm ()->getRank ();
45  os << myRank << ": Export ctor" << endl;
46  this->verboseOutputStream () << os.str ();
47  }
48  Teuchos::Array<GlobalOrdinal> exportGIDs;
49  setupSamePermuteExport (exportGIDs);
50  if (source->isDistributed ()) {
51  setupRemote (exportGIDs);
52  }
53 
54  TEUCHOS_ASSERT( ! this->TransferData_->permuteFromLIDs_.need_sync_device () );
55  TEUCHOS_ASSERT( ! this->TransferData_->permuteFromLIDs_.need_sync_host () );
56  TEUCHOS_ASSERT( ! this->TransferData_->permuteToLIDs_.need_sync_device () );
57  TEUCHOS_ASSERT( ! this->TransferData_->permuteToLIDs_.need_sync_host () );
58  TEUCHOS_ASSERT( ! this->TransferData_->remoteLIDs_.need_sync_device () );
59  TEUCHOS_ASSERT( ! this->TransferData_->remoteLIDs_.need_sync_host () );
60  TEUCHOS_ASSERT( ! this->TransferData_->exportLIDs_.need_sync_device () );
61  TEUCHOS_ASSERT( ! this->TransferData_->exportLIDs_.need_sync_host () );
62 
63  this->detectRemoteExportLIDsContiguous();
64 
65  if (this->verbose ()) {
66  std::ostringstream os;
67  const int myRank = source->getComm ()->getRank ();
68  os << myRank << ": Export ctor: done" << endl;
69  this->verboseOutputStream () << os.str ();
70  }
71  }
72 
73  template <class LocalOrdinal, class GlobalOrdinal, class Node>
75  Export (const Teuchos::RCP<const map_type>& source,
76  const Teuchos::RCP<const map_type>& target) :
77  Export (source, target, Teuchos::null, Teuchos::null)
78  {}
79 
80  template <class LocalOrdinal, class GlobalOrdinal, class Node>
82  Export (const Teuchos::RCP<const map_type >& source,
83  const Teuchos::RCP<const map_type >& target,
84  const Teuchos::RCP<Teuchos::FancyOStream>& out) :
85  Export (source, target, out, Teuchos::null)
86  {}
87 
88  template <class LocalOrdinal, class GlobalOrdinal, class Node>
90  Export (const Teuchos::RCP<const map_type >& source,
91  const Teuchos::RCP<const map_type >& target,
92  const Teuchos::RCP<Teuchos::ParameterList>& plist) :
93  Export (source, target, Teuchos::null, plist)
94  {}
95 
96  template <class LocalOrdinal, class GlobalOrdinal, class Node>
99  base_type (rhs)
100  {}
101 
102  template <class LocalOrdinal, class GlobalOrdinal, class Node>
105  base_type (importer, typename base_type::reverse_tag ())
106  {}
107 
108  template <class LocalOrdinal, class GlobalOrdinal, class Node>
109  void
111  describe (Teuchos::FancyOStream& out,
112  const Teuchos::EVerbosityLevel verbLevel) const
113  {
114  // Call the base class' method. It does all the work.
115  this->describeImpl (out, "Tpetra::Export", verbLevel);
116  }
117 
118  template <class LocalOrdinal, class GlobalOrdinal, class Node>
120  print (std::ostream& os) const
121  {
122  auto out = Teuchos::getFancyOStream (Teuchos::rcpFromRef (os));
123  // "Print" traditionally meant "everything."
124  this->describe (*out, Teuchos::VERB_EXTREME);
125  }
126 
127  template <class LocalOrdinal, class GlobalOrdinal, class Node>
128  void
130  setupSamePermuteExport (Teuchos::Array<GlobalOrdinal>& exportGIDs)
131  {
132  using ::Tpetra::Details::makeDualViewFromOwningHostView;
133  using ::Tpetra::Details::ProfilingRegion;
134  using ::Tpetra::Details::view_alloc_no_init;
135  using Teuchos::arcp;
136  using Teuchos::Array;
137  using Teuchos::ArrayRCP;
138  using Teuchos::ArrayView;
139  using Teuchos::as;
140  using Teuchos::null;
141  using std::endl;
142  using LO = LocalOrdinal;
143  using GO = GlobalOrdinal;
144  using size_type = typename ArrayView<const GO>::size_type;
145  const char tfecfFuncName[] = "setupSamePermuteExport: ";
146  ProfilingRegion regionExport ("Tpetra::Export::setupSamePermuteExport");
147 
148  std::unique_ptr<std::string> prefix;
149  if (this->verbose ()) {
150  auto srcMap = this->getSourceMap ();
151  auto comm = srcMap.is_null () ? Teuchos::null : srcMap->getComm ();
152  const int myRank = comm.is_null () ? -1 : comm->getRank ();
153 
154  std::ostringstream os;
155  os << "Proc " << myRank << ": Tpetra::Export::setupSamePermuteExport: ";
156  prefix = std::unique_ptr<std::string> (new std::string (os.str ()));
157 
158  std::ostringstream os2;
159  os2 << *prefix << "Start" << std::endl;
160  this->verboseOutputStream () << os2.str ();
161  }
162 
163  const map_type& source = * (this->getSourceMap ());
164  const map_type& target = * (this->getTargetMap ());
165  ArrayView<const GO> sourceGIDs = source.getLocalElementList ();
166  ArrayView<const GO> targetGIDs = target.getLocalElementList ();
167 
168 #ifdef HAVE_TPETRA_DEBUG
169  ArrayView<const GO> rawSrcGids = sourceGIDs;
170  ArrayView<const GO> rawTgtGids = targetGIDs;
171 #else
172  const GO* const rawSrcGids = sourceGIDs.getRawPtr ();
173  const GO* const rawTgtGids = targetGIDs.getRawPtr ();
174 #endif // HAVE_TPETRA_DEBUG
175  const size_type numSrcGids = sourceGIDs.size ();
176  const size_type numTgtGids = targetGIDs.size ();
177  const size_type numGids = std::min (numSrcGids, numTgtGids);
178 
179  // Compute numSameIDs_: the number of initial GIDs that are the
180  // same (and occur in the same order) in both Maps. The point of
181  // numSameIDs_ is for the common case of an Export where all the
182  // overlapping GIDs are at the end of the source Map, but
183  // otherwise the source and target Maps are the same. This allows
184  // a fast contiguous copy for the initial "same IDs."
185  size_type numSameGids = 0;
186  for ( ; numSameGids < numGids &&
187  rawSrcGids[numSameGids] == rawTgtGids[numSameGids];
188  ++numSameGids)
189  {} // third clause of 'for' does everything
190  this->TransferData_->numSameIDs_ = numSameGids;
191 
192  if (this->verbose ()) {
193  std::ostringstream os;
194  os << *prefix << "numIDs: " << numGids
195  << ", numSameIDs: " << numSameGids << endl;
196  this->verboseOutputStream () << os.str ();
197  }
198 
199  // Compute permuteToLIDs_, permuteFromLIDs_, exportGIDs, and
200  // exportLIDs_. The first two arrays are IDs to be permuted, and
201  // the latter two arrays are IDs to sent out ("exported"), called
202  // "export" IDs.
203  //
204  // IDs to permute are in both the source and target Maps, which
205  // means we don't have to send or receive them, but we do have to
206  // rearrange (permute) them in general. IDs to send are in the
207  // source Map, but not in the target Map.
208 
209  // Iterate over the source Map's LIDs, since we only need to do
210  // GID -> LID lookups for the target Map.
211  const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid ();
212  const LO numSrcLids = static_cast<LO> (numSrcGids);
213  LO numPermutes = 0;
214  LO numExports = 0;
215 
216  for (LO srcLid = numSameGids; srcLid < numSrcLids; ++srcLid) {
217  const GO curSrcGid = rawSrcGids[srcLid];
218  // getLocalElement() returns LINVALID if the GID isn't in the
219  // target Map. This saves us a lookup (which
220  // isNodeGlobalElement() would do).
221  const LO tgtLid = target.getLocalElement (curSrcGid);
222  if (tgtLid != LINVALID) { // if target.isNodeGlobalElement (curSrcGid)
223  ++numPermutes;
224  }
225  else {
226  ++numExports;
227  }
228  }
229  if (this->verbose ()) {
230  std::ostringstream os;
231  os << *prefix << "numPermutes: " << numPermutes
232  << ", numExports: " << numExports << endl;
233  this->verboseOutputStream () << os.str ();
234  }
235  TEUCHOS_ASSERT( numPermutes + numExports ==
236  numSrcLids - numSameGids );
237 
238  typename decltype (this->TransferData_->permuteToLIDs_)::t_host
239  permuteToLIDs (view_alloc_no_init ("permuteToLIDs"), numPermutes);
240  typename decltype (this->TransferData_->permuteToLIDs_)::t_host
241  permuteFromLIDs (view_alloc_no_init ("permuteFromLIDs"), numPermutes);
242  typename decltype (this->TransferData_->permuteToLIDs_)::t_host
243  exportLIDs (view_alloc_no_init ("exportLIDs"), numExports);
244 
245  // FIXME (mfh 03 Feb 2019) Replace with std::unique_ptr of array,
246  // to avoid superfluous initialization on resize.
247  exportGIDs.resize (numExports);
248 
249  {
250  LO numPermutes2 = 0;
251  LO numExports2 = 0;
252  for (LO srcLid = numSameGids; srcLid < numSrcLids; ++srcLid) {
253  const GO curSrcGid = rawSrcGids[srcLid];
254  const LO tgtLid = target.getLocalElement (curSrcGid);
255  if (tgtLid != LINVALID) {
256  permuteToLIDs[numPermutes2] = tgtLid;
257  permuteFromLIDs[numPermutes2] = srcLid;
258  ++numPermutes2;
259  }
260  else {
261  exportGIDs[numExports2] = curSrcGid;
262  exportLIDs[numExports2] = srcLid;
263  ++numExports2;
264  }
265  }
266  TEUCHOS_ASSERT( numPermutes == numPermutes2 );
267  TEUCHOS_ASSERT( numExports == numExports2 );
268  TEUCHOS_ASSERT( size_t (numExports) == size_t (exportGIDs.size ()) );
269  }
270 
271  // Defer making this->TransferData_->exportLIDs_ until after
272  // getRemoteIndexList, since we might need to shrink it then.
273 
274  // exportLIDs is the list of this process' LIDs that it has to
275  // send out. Since this is an Export, and therefore the target
276  // Map is nonoverlapping, we know that each export LID only needs
277  // to be sent to one process. However, the source Map may be
278  // overlapping, so multiple processes might send to the same LID
279  // on a receiving process.
280 
281  if (numExports != 0 && ! source.isDistributed ()) {
282  // This Export has export LIDs, meaning that the source Map has
283  // entries on this process that are not in the target Map on
284  // this process. However, the source Map is not distributed
285  // globally. This implies that this Import is not locally
286  // complete on this process.
287  this->TransferData_->isLocallyComplete_ = false;
288  if (this->verbose ()) {
289  std::ostringstream os;
290  os << *prefix << "Export is not locally complete" << endl;
291  this->verboseOutputStream () << os.str ();
292  }
293  // mfh 12 Sep 2016: I disagree that this is "abuse"; it may be
294  // correct behavior, depending on the circumstances.
296  (true, std::runtime_error, "::setupSamePermuteExport(): Source has "
297  "export LIDs but Source is not distributed globally. Exporting to "
298  "a submap of the target map.");
299  }
300 
301  // Compute exportPIDs_ ("outgoing" process IDs).
302  //
303  // For each GID in exportGIDs (GIDs to which this process must
304  // send), find its corresponding owning process (a.k.a. "image")
305  // ID in the target Map. Store these process IDs in
306  // exportPIDs_. These are the process IDs to which the Export
307  // needs to send data.
308  //
309  // We only need to do this if the source Map is distributed;
310  // otherwise, the Export doesn't have to perform any
311  // communication.
312  if (source.isDistributed ()) {
313  if (this->verbose ()) {
314  std::ostringstream os;
315  os << *prefix << "Source Map is distributed; "
316  "call targetMap.getRemoteiNdexList" << endl;
317  this->verboseOutputStream () << os.str ();
318  }
319  this->TransferData_->exportPIDs_.resize(exportGIDs.size ());
320  // This call will assign any GID in the target Map with no
321  // corresponding process ID a fake process ID of -1. We'll use
322  // this below to remove exports for processses that don't exist.
323  const LookupStatus lookup =
324  target.getRemoteIndexList (exportGIDs(),
325  this->TransferData_->exportPIDs_ ());
326  // mfh 12 Sep 2016: I disagree that this is "abuse"; it may be
327  // correct behavior, depending on the circumstances.
328  TPETRA_ABUSE_WARNING( lookup == IDNotPresent, std::runtime_error,
329  "::setupSamePermuteExport(): The source Map has GIDs not found "
330  "in the target Map.");
331 
332  // Get rid of process IDs not in the target Map. This prevents
333  // exporting to GIDs which don't belong to any process in the
334  // target Map.
335  if (lookup == IDNotPresent) {
336  // There is at least one GID owned by the calling process in
337  // the source Map, which is not owned by any process in the
338  // target Map.
339  this->TransferData_->isLocallyComplete_ = false;
340 
341  Teuchos::Array<int>& exportPIDs = this->TransferData_->exportPIDs_;
342 
343  const size_type totalNumExports = exportPIDs.size ();
344  const size_type numInvalidExports =
345  std::count_if (exportPIDs.begin (), exportPIDs.end (),
346  [] (const int procId) { return procId == -1; });
347  if (this->verbose ()) {
348  std::ostringstream os;
349  os << *prefix << "totalNumExports: " << totalNumExports
350  << ", numInvalidExports: " << numInvalidExports << endl;
351  this->verboseOutputStream () << os.str ();
352  }
353  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
354  (numInvalidExports == 0, std::logic_error,
355  "targetMap.getRemoteIndexList returned IDNotPresent, but no export "
356  "PIDs are -1. Please report this bug to the Tpetra developers.");
357 
358  // We know that at least one export ID is invalid, that is,
359  // not in any process on the target Map. If all export IDs
360  // are invalid, we can delete all exports. Otherwise, keep
361  // the valid exports and discard the rest. This is legit
362  // Petra Object Model behavior, but it's a less common case.
363 
364  if (numInvalidExports == totalNumExports) {
365  exportGIDs.resize (0);
366  exportLIDs = decltype (exportLIDs) ();
367  exportPIDs.resize (0);
368  }
369  else {
370  size_type numValidExports = 0;
371  for (size_type e = 0; e < totalNumExports; ++e) {
372  if (this->TransferData_->exportPIDs_[e] != -1) {
373  exportGIDs[numValidExports] = exportGIDs[e];
374  exportLIDs[numValidExports] = exportLIDs[e];
375  exportPIDs[numValidExports] = exportPIDs[e];
376  ++numValidExports;
377  }
378  }
379  exportGIDs.resize (numValidExports);
380  Kokkos::resize (exportLIDs, numValidExports);
381  exportPIDs.resize (numValidExports);
382  }
383  }
384  }
385 
386  // FIXME (mfh 03 Feb 2019) These three DualViews could share a
387  // single device allocation, in order to avoid high cudaMalloc
388  // cost and device memory fragmentation.
389  makeDualViewFromOwningHostView (this->TransferData_->permuteToLIDs_, permuteToLIDs);
390  makeDualViewFromOwningHostView (this->TransferData_->permuteFromLIDs_, permuteFromLIDs);
391  makeDualViewFromOwningHostView (this->TransferData_->exportLIDs_, exportLIDs);
392 
393  if (this->verbose ()) {
394  std::ostringstream os;
395  os << *prefix << "Done!" << std::endl;
396  this->verboseOutputStream () << os.str ();
397  }
398  }
399 
400  template <class LocalOrdinal, class GlobalOrdinal, class Node>
401  void
402  Export<LocalOrdinal,GlobalOrdinal,Node>::
403  setupRemote (Teuchos::Array<GlobalOrdinal>& exportGIDs)
404  {
405  using ::Tpetra::Details::view_alloc_no_init;
406  using ::Tpetra::Details::makeDualViewFromOwningHostView;
407  using Teuchos::Array;
408  using std::endl;
409  using LO = LocalOrdinal;
410  using GO = GlobalOrdinal;
411 
412  std::unique_ptr<std::string> prefix;
413  if (this->verbose ()) {
414  auto srcMap = this->getSourceMap ();
415  auto comm = srcMap.is_null () ? Teuchos::null : srcMap->getComm ();
416  const int myRank = comm.is_null () ? -1 : comm->getRank ();
417 
418  std::ostringstream os;
419  os << "Proc " << myRank << ": Tpetra::Export::setupRemote: ";
420  prefix = std::unique_ptr<std::string> (new std::string (os.str ()));
421 
422  std::ostringstream os2;
423  os2 << *prefix << "Start" << std::endl;
424  this->verboseOutputStream () << os2.str ();
425  }
426 
427  TEUCHOS_ASSERT( ! this->getTargetMap ().is_null () );
428  const map_type& tgtMap = * (this->getTargetMap ());
429 
430  // Sort exportPIDs_ in ascending order, and apply the same
431  // permutation to exportGIDs_ and exportLIDs_. This ensures that
432  // exportPIDs_[i], exportGIDs_[i], and exportLIDs_[i] all
433  // refer to the same thing.
434  {
435  TEUCHOS_ASSERT( size_t (this->TransferData_->exportLIDs_.extent (0)) ==
436  size_t (this->TransferData_->exportPIDs_.size ()) );
437  this->TransferData_->exportLIDs_.modify_host ();
438  auto exportLIDs = this->TransferData_->exportLIDs_.view_host ();
439  sort3 (this->TransferData_->exportPIDs_.begin (),
440  this->TransferData_->exportPIDs_.end (),
441  exportGIDs.getRawPtr (),
442  exportLIDs.data ());
443  this->TransferData_->exportLIDs_.sync_device ();
444  // FIXME (mfh 03 Feb 2019) We actually end up sync'ing
445  // exportLIDs_ to device twice, once in setupSamePermuteExport,
446  // and once here. We could avoid the first sync.
447  }
448 
449  if (this->verbose ()) {
450  std::ostringstream os;
451  os << *prefix << "Call createFromSends" << endl;
452  this->verboseOutputStream () << os.str ();
453  }
454 
455  // Construct the list of entries that calling image needs to send
456  // as a result of everyone asking for what it needs to receive.
457  //
458  // mfh 05 Jan 2012: I understand the above comment as follows:
459  // Construct the communication plan from the list of image IDs to
460  // which we need to send.
461  Teuchos::Array<int>& exportPIDs = this->TransferData_->exportPIDs_;
462  Distributor& distributor = this->TransferData_->distributor_;
463  const size_t numRemoteIDs = distributor.createFromSends (exportPIDs ());
464 
465  if (this->verbose ()) {
466  std::ostringstream os;
467  os << *prefix << "numRemoteIDs: " << numRemoteIDs
468  << "; call doPostsAndWaits" << endl;
469  this->verboseOutputStream () << os.str ();
470  }
471 
472  // Use the communication plan with ExportGIDs to find out who is
473  // sending to us and get the proper ordering of GIDs for incoming
474  // remote entries (these will be converted to LIDs when done).
475 
476  Kokkos::View<const GO*, Kokkos::HostSpace> exportGIDsConst(exportGIDs.data(), exportGIDs.size());
477  Kokkos::View<GO*, Kokkos::HostSpace> remoteGIDs("remoteGIDs", numRemoteIDs);
478  distributor.doPostsAndWaits(exportGIDsConst, 1, remoteGIDs);
479 
480  // Remote (incoming) IDs come in as GIDs; convert to LIDs. LIDs
481  // tell this process where to store the incoming remote data.
482  using host_remote_lids_type =
483  typename decltype (this->TransferData_->remoteLIDs_)::t_host;
484  host_remote_lids_type remoteLIDs
485  (view_alloc_no_init ("remoteLIDs"), numRemoteIDs);
486 
487  for (LO j = 0; j < LO (numRemoteIDs); ++j) {
488  remoteLIDs[j] = tgtMap.getLocalElement (remoteGIDs[j]);
489  }
490  makeDualViewFromOwningHostView (this->TransferData_->remoteLIDs_, remoteLIDs);
491 
492  if (this->verbose ()) {
493  std::ostringstream os;
494  os << *prefix << "Done!" << endl;
495  this->verboseOutputStream () << os.str ();
496  }
497  }
498 
499 } // namespace Tpetra
500 
501 // Explicit instantiation macro.
502 // Only invoke this when in the Tpetra namespace.
503 // Most users do not need to use this.
504 //
505 // LO: The local ordinal type.
506 // GO: The global ordinal type.
507 // NODE: The Kokkos Node type.
508 #define TPETRA_EXPORT_INSTANT(LO, GO, NODE) \
509  template class Export< LO , GO , NODE >;
510 
511 #endif // TPETRA_EXPORT_DEF_HPP
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Teuchos::FancyOStream & verboseOutputStream() const
Valid (nonnull) output stream for verbose output.
virtual void print(std::ostream &os) const
Print the Export&#39;s data to the given output stream.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
void sort3(const IT1 &first1, const IT1 &last1, const IT2 &first2, const IT3 &first3, const bool stableSort=false)
Sort the first array, and apply the same permutation to the second and third arrays.
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Describe this object in a human-readable way to the given output stream.
Export(const Teuchos::RCP< const map_type > &source, const Teuchos::RCP< const map_type > &target)
Construct a Export object from the source and target Map.
auto view_alloc_no_init(const std::string &label) ->
Use in place of the string label as the first argument of Kokkos::View&#39;s constructor, in case you want to allocate without initializing.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
#define TPETRA_ABUSE_WARNING(throw_exception_test, Exception, msg)
Handle an abuse warning, according to HAVE_TPETRA_THROW_ABUSE_WARNINGS and HAVE_TPETRA_PRINT_ABUSE_WA...
Teuchos::RCP< ImportExportData< LocalOrdinal, GlobalOrdinal, Node > > TransferData_
All the data needed for executing the Export communication plan.
Stand-alone utility functions and macros.
void makeDualViewFromOwningHostView(Kokkos::DualView< ElementType *, DeviceType > &dv, const typename Kokkos::DualView< ElementType *, DeviceType >::t_host &hostView)
Initialize dv such that its host View is hostView.
bool verbose() const
Whether to print verbose debugging output.