Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_DirectoryImpl_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_DIRECTORYIMPL_DEF_HPP
11 #define TPETRA_DIRECTORYIMPL_DEF_HPP
12 
15 
16 #include "Tpetra_Distributor.hpp"
17 #include "Tpetra_Map.hpp"
18 #include "Tpetra_TieBreak.hpp"
19 #include "Tpetra_Util.hpp"
20 #include "Tpetra_Details_FixedHashTable.hpp"
21 #include "Teuchos_Comm.hpp"
22 #include <memory>
23 #include <sstream>
24 
25 // FIXME (mfh 16 Apr 2013) GIANT HACK BELOW
26 #ifdef HAVE_TPETRACORE_MPI
27 #include <mpi.h>
28 #endif // HAVE_TPETRACORE_MPI
29 // FIXME (mfh 16 Apr 2013) GIANT HACK ABOVE
30 
31 namespace Tpetra {
32 namespace Details {
33 template <class LO, class GO, class NT>
36  getEntries(const map_type& map,
37  const Teuchos::ArrayView<const GO>& globalIDs,
38  const Teuchos::ArrayView<int>& nodeIDs,
39  const Teuchos::ArrayView<LO>& localIDs,
40  const bool computeLIDs) const {
41  // Ensure that globalIDs, nodeIDs, and localIDs (if applicable)
42  // all have the same size, before modifying any output arguments.
43  TEUCHOS_TEST_FOR_EXCEPTION(nodeIDs.size() != globalIDs.size(),
44  std::invalid_argument, Teuchos::typeName(*this) << "::getEntries(): "
45  "Output arrays do not have the right sizes. nodeIDs.size() = "
46  << nodeIDs.size() << " != globalIDs.size() = " << globalIDs.size() << ".");
47  TEUCHOS_TEST_FOR_EXCEPTION(
48  computeLIDs && localIDs.size() != globalIDs.size(),
49  std::invalid_argument, Teuchos::typeName(*this) << "::getEntries(): "
50  "Output array do not have the right sizes. localIDs.size() = "
51  << localIDs.size() << " != globalIDs.size() = " << globalIDs.size() << ".");
52 
53  // Initially, fill nodeIDs and localIDs (if applicable) with
54  // invalid values. The "invalid" process ID is -1 (this means
55  // the same thing as MPI_ANY_SOURCE to Teuchos, so it's an
56  // "invalid" process ID); the invalid local ID comes from
57  // OrdinalTraits.
58  std::fill(nodeIDs.begin(), nodeIDs.end(), -1);
59  if (computeLIDs) {
60  std::fill(localIDs.begin(), localIDs.end(),
61  Teuchos::OrdinalTraits<LO>::invalid());
62  }
63  // Actually do the work.
64  return this->getEntriesImpl(map, globalIDs, nodeIDs, localIDs, computeLIDs);
65 }
66 
67 template <class LO, class GO, class NT>
70  : numProcs_(map.getComm()->getSize()) {}
71 
72 template <class LO, class GO, class NT>
74  isOneToOne(const Teuchos::Comm<int>& /* comm */) const {
75  // A locally replicated Map is one-to-one only if there is no
76  // replication, that is, only if the Map's communicator only has
77  // one process.
78  return (numProcs_ == 1);
79 }
80 
81 template <class LO, class GO, class NT>
82 std::string
84  std::ostringstream os;
85  os << "ReplicatedDirectory"
86  << "<" << Teuchos::TypeNameTraits<LO>::name()
87  << ", " << Teuchos::TypeNameTraits<GO>::name()
88  << ", " << Teuchos::TypeNameTraits<NT>::name() << ">";
89  return os.str();
90 }
91 
92 template <class LO, class GO, class NT>
94  ContiguousUniformDirectory(const map_type& map) {
95  TEUCHOS_TEST_FOR_EXCEPTION(!map.isContiguous(), std::invalid_argument,
96  Teuchos::typeName(*this) << " constructor: Map is not contiguous.");
97  TEUCHOS_TEST_FOR_EXCEPTION(!map.isUniform(), std::invalid_argument,
98  Teuchos::typeName(*this) << " constructor: Map is not uniform.");
99 }
100 
101 template <class LO, class GO, class NT>
102 std::string
104  std::ostringstream os;
105  os << "ContiguousUniformDirectory"
106  << "<" << Teuchos::TypeNameTraits<LO>::name()
107  << ", " << Teuchos::TypeNameTraits<GO>::name()
108  << ", " << Teuchos::TypeNameTraits<NT>::name() << ">";
109  return os.str();
110 }
111 
112 template <class LO, class GO, class NT>
116  const Teuchos::ArrayView<const GO>& globalIDs,
117  const Teuchos::ArrayView<int>& nodeIDs,
118  const Teuchos::ArrayView<LO>& localIDs,
119  const bool computeLIDs) const {
120  using Teuchos::Comm;
121  using Teuchos::RCP;
122  typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
123  const LO invalidLid = Teuchos::OrdinalTraits<LO>::invalid();
125 
126  RCP<const Comm<int> > comm = map.getComm();
127  const GO g_min = map.getMinAllGlobalIndex();
128 
129  // Let N_G be the global number of elements in the Map,
130  // and P be the number of processes in its communicator.
131  // Then, N_G = P * N_L + R = R*(N_L + 1) + (P - R)*N_L.
132  //
133  // The first R processes own N_L+1 elements.
134  // The remaining P-R processes own N_L elements.
135  //
136  // Let g be the current GID, g_min be the global minimum GID,
137  // and g_0 = g - g_min. If g is a valid GID in this Map, then
138  // g_0 is in [0, N_G - 1].
139  //
140  // If g is a valid GID in this Map and g_0 < R*(N_L + 1), then
141  // the rank of the process that owns g is floor(g_0 / (N_L +
142  // 1)), and its corresponding local index on that process is g_0
143  // mod (N_L + 1).
144  //
145  // Let g_R = g_0 - R*(N_L + 1). If g is a valid GID in this Map
146  // and g_0 >= R*(N_L + 1), then the rank of the process that
147  // owns g is then R + floor(g_R / N_L), and its corresponding
148  // local index on that process is g_R mod N_L.
149 
150  const size_type N_G =
151  static_cast<size_type>(map.getGlobalNumElements());
152  const size_type P = static_cast<size_type>(comm->getSize());
153  const size_type N_L = N_G / P;
154  const size_type R = N_G - N_L * P; // N_G mod P
155  const size_type N_R = R * (N_L + static_cast<size_type>(1));
156 
157 #ifdef HAVE_TPETRA_DEBUG
158  TEUCHOS_TEST_FOR_EXCEPTION(
159  N_G != P * N_L + R, std::logic_error,
160  "Tpetra::ContiguousUniformDirectory::getEntriesImpl: "
161  "N_G = "
162  << N_G << " != P*N_L + R = " << P << "*" << N_L << " + " << R
163  << " = " << P * N_L + R << ". "
164  "Please report this bug to the Tpetra developers.");
165 #endif // HAVE_TPETRA_DEBUG
166 
167  const size_type numGids = globalIDs.size(); // for const loop bound
168  // Avoid signed/unsigned comparisons below, in case GO is
169  // unsigned. (Integer literals are generally signed.)
170  const GO ONE = static_cast<GO>(1);
171 
172  if (computeLIDs) {
173  for (size_type k = 0; k < numGids; ++k) {
174  const GO g_0 = globalIDs[k] - g_min;
175 
176  // The first test is a little strange just in case GO is
177  // unsigned. Compilers raise a warning on tests like "x <
178  // 0" if x is unsigned, but don't usually raise a warning if
179  // the expression is a bit more complicated than that.
180  if (g_0 + ONE < ONE || g_0 >= static_cast<GO>(N_G)) {
181  nodeIDs[k] = -1;
182  localIDs[k] = invalidLid;
183  res = IDNotPresent;
184  } else if (g_0 < static_cast<GO>(N_R)) {
185  // The GID comes from the initial sequence of R processes.
186  nodeIDs[k] = static_cast<int>(g_0 / static_cast<GO>(N_L + 1));
187  localIDs[k] = static_cast<LO>(g_0 % static_cast<GO>(N_L + 1));
188  } else if (g_0 >= static_cast<GO>(N_R)) {
189  // The GID comes from the remaining P-R processes.
190  const GO g_R = g_0 - static_cast<GO>(N_R);
191  nodeIDs[k] = static_cast<int>(R + g_R / N_L);
192  localIDs[k] = static_cast<int>(g_R % N_L);
193  }
194 #ifdef HAVE_TPETRA_DEBUG
195  else {
196  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
197  "Tpetra::ContiguousUniformDirectory::getEntriesImpl: "
198  "should never get here. "
199  "Please report this bug to the Tpetra developers.");
200  }
201 #endif // HAVE_TPETRA_DEBUG
202  }
203  } else { // don't compute local indices
204  for (size_type k = 0; k < numGids; ++k) {
205  const GO g_0 = globalIDs[k] - g_min;
206  // The first test is a little strange just in case GO is
207  // unsigned. Compilers raise a warning on tests like "x <
208  // 0" if x is unsigned, but don't usually raise a warning if
209  // the expression is a bit more complicated than that.
210  if (g_0 + ONE < ONE || g_0 >= static_cast<GO>(N_G)) {
211  nodeIDs[k] = -1;
212  res = IDNotPresent;
213  } else if (g_0 < static_cast<GO>(N_R)) {
214  // The GID comes from the initial sequence of R processes.
215  nodeIDs[k] = static_cast<int>(g_0 / static_cast<GO>(N_L + 1));
216  } else if (g_0 >= static_cast<GO>(N_R)) {
217  // The GID comes from the remaining P-R processes.
218  const GO g_R = g_0 - static_cast<GO>(N_R);
219  nodeIDs[k] = static_cast<int>(R + g_R / N_L);
220  }
221 #ifdef HAVE_TPETRA_DEBUG
222  else {
223  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
224  "Tpetra::ContiguousUniformDirectory::getEntriesImpl: "
225  "should never get here. "
226  "Please report this bug to the Tpetra developers.");
227  }
228 #endif // HAVE_TPETRA_DEBUG
229  }
230  }
231  return res;
232 }
233 
234 template <class LO, class GO, class NT>
236  DistributedContiguousDirectory(const map_type& map) {
237  using Teuchos::arcp;
238  using Teuchos::gatherAll;
239  using Teuchos::RCP;
240 
241  RCP<const Teuchos::Comm<int> > comm = map.getComm();
242 
243  TEUCHOS_TEST_FOR_EXCEPTION(!map.isDistributed(), std::invalid_argument,
244  Teuchos::typeName(*this) << " constructor: Map is not distributed.");
245  TEUCHOS_TEST_FOR_EXCEPTION(!map.isContiguous(), std::invalid_argument,
246  Teuchos::typeName(*this) << " constructor: Map is not contiguous.");
247 
248  const int numProcs = comm->getSize();
249 
250  // Make room for the min global ID on each process, plus one
251  // entry at the end for the "max cap."
252  allMinGIDs_ = arcp<GO>(numProcs + 1);
253  // Get my process' min global ID.
254  GO minMyGID = map.getMinGlobalIndex();
255  // Gather all of the min global IDs into the first numProcs
256  // entries of allMinGIDs_.
257 
258  // FIXME (mfh 16 Apr 2013) GIANT HACK BELOW
259  //
260  // The purpose of this giant hack is that gatherAll appears to
261  // interpret the "receive count" argument differently than
262  // MPI_Allgather does. Matt Bettencourt reports Valgrind issues
263  // (memcpy with overlapping data) with MpiComm<int>::gatherAll,
264  // which could relate either to this, or to OpenMPI.
265 #ifdef HAVE_TPETRACORE_MPI
266  MPI_Datatype rawMpiType = MPI_INT;
267  bool useRawMpi = true;
268  if (typeid(GO) == typeid(int)) {
269  rawMpiType = MPI_INT;
270  } else if (typeid(GO) == typeid(long)) {
271  rawMpiType = MPI_LONG;
272  } else {
273  useRawMpi = false;
274  }
275  if (useRawMpi) {
276  using Teuchos::MpiComm;
277  using Teuchos::rcp_dynamic_cast;
278  RCP<const MpiComm<int> > mpiComm =
279  rcp_dynamic_cast<const MpiComm<int> >(comm);
280  // It could be a SerialComm instead, even in an MPI build, so
281  // be sure to check.
282  if (!comm.is_null()) {
283  MPI_Comm rawMpiComm = *(mpiComm->getRawMpiComm());
284  const int err =
285  MPI_Allgather(&minMyGID, 1, rawMpiType,
286  allMinGIDs_.getRawPtr(), 1, rawMpiType,
287  rawMpiComm);
288  TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
289  "Tpetra::DistributedContiguousDirectory: MPI_Allgather failed");
290  } else {
291  gatherAll<int, GO>(*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr());
292  }
293  } else {
294  gatherAll<int, GO>(*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr());
295  }
296 #else // NOT HAVE_TPETRACORE_MPI
297  gatherAll<int, GO>(*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr());
298 #endif // HAVE_TPETRACORE_MPI
299  // FIXME (mfh 16 Apr 2013) GIANT HACK ABOVE
300 
301  // gatherAll<int, GO> (*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr ());
302 
303  // Put the max cap at the end. Adding one lets us write loops
304  // over the global IDs with the usual strict less-than bound.
305  allMinGIDs_[numProcs] = map.getMaxAllGlobalIndex() + Teuchos::OrdinalTraits<GO>::one();
306 }
307 
308 template <class LO, class GO, class NT>
309 std::string
311  std::ostringstream os;
312  os << "DistributedContiguousDirectory"
313  << "<" << Teuchos::TypeNameTraits<LO>::name()
314  << ", " << Teuchos::TypeNameTraits<GO>::name()
315  << ", " << Teuchos::TypeNameTraits<NT>::name() << ">";
316  return os.str();
317 }
318 
319 template <class LO, class GO, class NT>
323  const Teuchos::ArrayView<const GO>& globalIDs,
324  const Teuchos::ArrayView<int>& nodeIDs,
325  const Teuchos::ArrayView<LO>& localIDs,
326  const bool computeLIDs) const {
327  using Teuchos::Array;
328  using Teuchos::ArrayRCP;
329  using Teuchos::ArrayView;
330  using Teuchos::as;
331  using Teuchos::Comm;
332  using Teuchos::RCP;
333 
335  RCP<const Teuchos::Comm<int> > comm = map.getComm();
336  const int myRank = comm->getRank();
337 
338  // Map is on one process or is locally replicated.
339  typename ArrayView<int>::iterator procIter = nodeIDs.begin();
340  typename ArrayView<LO>::iterator lidIter = localIDs.begin();
341  typename ArrayView<const GO>::iterator gidIter;
342  for (gidIter = globalIDs.begin(); gidIter != globalIDs.end(); ++gidIter) {
343  if (map.isNodeGlobalElement(*gidIter)) {
344  *procIter++ = myRank;
345  if (computeLIDs) {
346  *lidIter++ = map.getLocalElement(*gidIter);
347  }
348  } else {
349  // Advance the pointers, leaving these values set to invalid
350  procIter++;
351  if (computeLIDs) {
352  lidIter++;
353  }
354  res = IDNotPresent;
355  }
356  }
357  return res;
358 }
359 
360 template <class LO, class GO, class NT>
364  const Teuchos::ArrayView<const GO>& globalIDs,
365  const Teuchos::ArrayView<int>& nodeIDs,
366  const Teuchos::ArrayView<LO>& localIDs,
367  const bool computeLIDs) const {
368  using Teuchos::Array;
369  using Teuchos::ArrayRCP;
370  using Teuchos::ArrayView;
371  using Teuchos::as;
372  using Teuchos::Comm;
373  using Teuchos::RCP;
374 
375  RCP<const Teuchos::Comm<int> > comm = map.getComm();
376  const int numProcs = comm->getSize();
377  const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid();
378  const GO noGIDsOnProc = std::numeric_limits<GO>::max();
380 
381  // Find the first initialized GID for search below
382  int firstProcWithGIDs;
383  for (firstProcWithGIDs = 0; firstProcWithGIDs < numProcs;
384  firstProcWithGIDs++) {
385  if (allMinGIDs_[firstProcWithGIDs] != noGIDsOnProc) break;
386  }
387 
388  // If Map is empty, return invalid values for all requested lookups
389  // This case should not happen, as an empty Map is not considered
390  // Distributed.
391  if (firstProcWithGIDs == numProcs) {
392  // No entries in Map
393  res = (globalIDs.size() > 0) ? IDNotPresent : AllIDsPresent;
394  std::fill(nodeIDs.begin(), nodeIDs.end(), -1);
395  if (computeLIDs)
396  std::fill(localIDs.begin(), localIDs.end(), LINVALID);
397  return res;
398  }
399 
400  const GO one = as<GO>(1);
401  const GO nOverP = as<GO>(map.getGlobalNumElements() / as<global_size_t>(numProcs - firstProcWithGIDs));
402 
403  // Map is distributed but contiguous.
404  typename ArrayView<int>::iterator procIter = nodeIDs.begin();
405  typename ArrayView<LO>::iterator lidIter = localIDs.begin();
406  typename ArrayView<const GO>::iterator gidIter;
407  for (gidIter = globalIDs.begin(); gidIter != globalIDs.end(); ++gidIter) {
408  LO LID = LINVALID; // Assume not found until proven otherwise
409  int image = -1;
410  GO GID = *gidIter;
411  // Guess uniform distribution (TODO: maybe replace by a binary search)
412  // We go through all this trouble to avoid overflow and
413  // signed / unsigned casting mistakes (that were made in
414  // previous versions of this code).
415  int curRank;
416  const GO firstGuess = firstProcWithGIDs + GID / std::max(nOverP, one);
417  curRank = as<int>(std::min(firstGuess, as<GO>(numProcs - 1)));
418 
419  // This while loop will stop because
420  // allMinGIDs_[np] == global num elements
421  while (allMinGIDs_[curRank] == noGIDsOnProc) curRank++;
422 
423  bool badGID = false;
424  while (curRank >= firstProcWithGIDs && GID < allMinGIDs_[curRank]) {
425  curRank--;
426  while (curRank >= firstProcWithGIDs &&
427  allMinGIDs_[curRank] == noGIDsOnProc) curRank--;
428  }
429  if (curRank < firstProcWithGIDs) {
430  // GID is lower than all GIDs in map
431  badGID = true;
432  } else if (curRank == numProcs) {
433  // GID is higher than all GIDs in map
434  badGID = true;
435  } else {
436  // we have the lower bound; now limit from above
437  int above = curRank + 1;
438  while (allMinGIDs_[above] == noGIDsOnProc) above++;
439 
440  while (GID >= allMinGIDs_[above]) {
441  curRank = above;
442  if (curRank == numProcs) {
443  // GID is higher than all GIDs in map
444  badGID = true;
445  break;
446  }
447  above++;
448  while (allMinGIDs_[above] == noGIDsOnProc) above++;
449  }
450  }
451 
452  if (!badGID) {
453  image = curRank;
454  LID = as<LO>(GID - allMinGIDs_[image]);
455  } else {
456  res = IDNotPresent;
457  }
458  *procIter++ = image;
459  if (computeLIDs) {
460  *lidIter++ = LID;
461  }
462  }
463  return res;
464 }
465 
466 template <class LO, class GO, class NT>
468  DistributedNoncontiguousDirectory(const map_type& map)
469  : oneToOneResult_(ONE_TO_ONE_NOT_CALLED_YET)
470  , // to be revised below
471  locallyOneToOne_(true)
472  , // to be revised below
473  useHashTables_(false) // to be revised below
474 {
475  initialize(map, Teuchos::null);
476 }
477 
478 template <class LO, class GO, class NT>
479 DistributedNoncontiguousDirectory<LO, GO, NT>::
480  DistributedNoncontiguousDirectory(const map_type& map,
481  const tie_break_type& tie_break)
482  : oneToOneResult_(ONE_TO_ONE_NOT_CALLED_YET)
483  , // to be revised below
484  locallyOneToOne_(true)
485  , // to be revised below
486  useHashTables_(false) // to be revised below
487 {
488  initialize(map, Teuchos::ptrFromRef(tie_break));
489 }
490 
491 template <class LO, class GO, class NT>
492 void DistributedNoncontiguousDirectory<LO, GO, NT>::
493  initialize(const map_type& map,
494  Teuchos::Ptr<const tie_break_type> tie_break) {
495  using std::cerr;
496  using std::endl;
497  using Teuchos::arcp;
498  using Teuchos::Array;
499  using Teuchos::ArrayRCP;
500  using Teuchos::ArrayView;
501  using Teuchos::as;
502  using Teuchos::RCP;
503  using Teuchos::rcp;
504  using Teuchos::typeName;
505  using Teuchos::TypeNameTraits;
506  typedef Array<int>::size_type size_type;
507 
508  // This class' implementation of getEntriesImpl() currently
509  // encodes the following assumptions:
510  //
511  // 1. global_size_t >= GO
512  // 2. global_size_t >= int
513  // 3. global_size_t >= LO
514  //
515  // We check these assumptions here.
516  TEUCHOS_TEST_FOR_EXCEPTION(sizeof(global_size_t) < sizeof(GO),
517  std::logic_error, typeName(*this) << ": sizeof(Tpetra::"
518  "global_size_t) = "
519  << sizeof(global_size_t) << " < sizeof(Global"
520  "Ordinal = "
521  << TypeNameTraits<LO>::name() << ") = " << sizeof(GO) << ".");
522  TEUCHOS_TEST_FOR_EXCEPTION(sizeof(global_size_t) < sizeof(int),
523  std::logic_error, typeName(*this) << ": sizeof(Tpetra::"
524  "global_size_t) = "
525  << sizeof(global_size_t) << " < sizeof(int) = " << sizeof(int) << ".");
526  TEUCHOS_TEST_FOR_EXCEPTION(sizeof(global_size_t) < sizeof(LO),
527  std::logic_error, typeName(*this) << ": sizeof(Tpetra::"
528  "global_size_t) = "
529  << sizeof(global_size_t) << " < sizeof(Local"
530  "Ordinal = "
531  << TypeNameTraits<LO>::name() << ") = " << sizeof(LO) << ".");
532 
533  RCP<const Teuchos::Comm<int> > comm = map.getComm();
534  const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid();
535  const GO minAllGID = map.getMinAllGlobalIndex();
536  const GO maxAllGID = map.getMaxAllGlobalIndex();
537 
538  // The "Directory Map" (see below) will have a range of elements
539  // from the minimum to the maximum GID of the user Map, and a
540  // minimum GID of minAllGID from the user Map. It doesn't
541  // actually have to store all those entries, though do beware of
542  // calling getLocalElementList on it (see Bug 5822).
543  const global_size_t numGlobalEntries = maxAllGID - minAllGID + 1;
544 
545  // We can't afford to replicate the whole directory on each
546  // process, so create the "Directory Map", a uniform contiguous
547  // Map that describes how we will distribute the directory over
548  // processes.
549  //
550  // FIXME (mfh 08 May 2012) Here we're setting minAllGID to be
551  // the index base. The index base should be separate from the
552  // minimum GID.
553  directoryMap_ = rcp(new map_type(numGlobalEntries, minAllGID, comm,
554  GloballyDistributed));
555  // The number of Directory elements that my process owns.
556  const size_t dir_numMyEntries = directoryMap_->getLocalNumElements();
557 
558  // Fix for Bug 5822: If the input Map is "sparse," that is if
559  // the difference between the global min and global max GID is
560  // much larger than the global number of elements in the input
561  // Map, then it's possible that the Directory Map might have
562  // many more entries than the input Map on this process. This
563  // can cause memory scalability issues. In that case, we switch
564  // from the array-based implementation of Directory storage to
565  // the hash table - based implementation. We don't use hash
566  // tables all the time, because they are slower in the common
567  // case of a nonsparse Map.
568  //
569  // NOTE: This is a per-process decision. Some processes may use
570  // array-based storage, whereas others may use hash table -
571  // based storage.
572 
573  // A hash table takes a constant factor more space, more or
574  // less, than an array. Thus, it's not worthwhile, even in
575  // terms of memory usage, always to use a hash table.
576  // Furthermore, array lookups are faster than hash table
577  // lookups, so it may be worthwhile to use an array even if it
578  // takes more space. The "sparsity threshold" governs when to
579  // switch to a hash table - based implementation.
580  const size_t inverseSparsityThreshold = 10;
581  useHashTables_ =
582  (dir_numMyEntries >= inverseSparsityThreshold * map.getLocalNumElements());
583 
584  // Get list of process IDs that own the directory entries for the
585  // Map GIDs. These will be the targets of the sends that the
586  // Distributor will do.
587  const int myRank = comm->getRank();
588  const size_t numMyEntries = map.getLocalNumElements();
589  Array<int> sendImageIDs(numMyEntries);
590  ArrayView<const GO> myGlobalEntries = map.getLocalElementList();
591  // An ID not present in this lookup indicates that it lies outside
592  // of the range [minAllGID,maxAllGID] (from map_). this means
593  // something is wrong with map_, our fault.
594  const LookupStatus lookupStatus =
595  directoryMap_->getRemoteIndexList(myGlobalEntries, sendImageIDs);
596  TEUCHOS_TEST_FOR_EXCEPTION(
597  lookupStatus == IDNotPresent, std::logic_error, Teuchos::typeName(*this) << " constructor: the Directory Map could not find out where one or "
598  "more of my Map's indices should go. The input to getRemoteIndexList "
599  "is "
600  << Teuchos::toString(myGlobalEntries) << ", and the output is " << Teuchos::toString(sendImageIDs()) << ". The input Map itself has "
601  "the following entries on the calling process "
602  << map.getComm()->getRank() << ": " << Teuchos::toString(map.getLocalElementList()) << ", and has " << map.getGlobalNumElements() << " total global indices in [" << map.getMinAllGlobalIndex() << "," << map.getMaxAllGlobalIndex() << "]. The Directory Map has " << directoryMap_->getGlobalNumElements() << " total global indices in "
603  "["
604  << directoryMap_->getMinAllGlobalIndex() << "," << directoryMap_->getMaxAllGlobalIndex() << "], and the calling process "
605  "has GIDs ["
606  << directoryMap_->getMinGlobalIndex() << "," << directoryMap_->getMaxGlobalIndex() << "]. "
607  "This probably means there is a bug in Map or Directory. "
608  "Please report this bug to the Tpetra developers.");
609 
610  // Initialize the distributor using the list of process IDs to
611  // which to send. We'll use the distributor to send out triples
612  // of (GID, process ID, LID). We're sending the entries to the
613  // processes that the Directory Map says should own them, which is
614  // why we called directoryMap_->getRemoteIndexList() above.
615  Distributor distor(comm);
616  const size_t numReceives = distor.createFromSends(sendImageIDs);
617 
618  // NOTE (mfh 21 Mar 2012) The following code assumes that
619  // sizeof(GO) >= sizeof(int) and sizeof(GO) >= sizeof(LO).
620  //
621  // Create and fill buffer of (GID, PID, LID) triples to send
622  // out. We pack the (GID, PID, LID) triples into a single Array
623  // of GO, casting the PID from int to GO and the LID from LO to
624  // GO as we do so.
625  //
626  // FIXME (mfh 23 Mar 2014) This assumes that sizeof(LO) <=
627  // sizeof(GO) and sizeof(int) <= sizeof(GO). The former is
628  // required, and the latter is generally the case, but we should
629  // still check for this.
630  const int packetSize = 3; // We're sending triples, so packet size is 3.
631  Kokkos::View<GO*, Kokkos::HostSpace> exportEntries("exportEntries", packetSize * numMyEntries);
632  {
633  size_t exportIndex = 0;
634  for (size_t i = 0; i < numMyEntries; ++i) {
635  exportEntries[exportIndex++] = myGlobalEntries[i];
636  exportEntries[exportIndex++] = as<GO>(myRank);
637  exportEntries[exportIndex++] = as<GO>(i);
638  }
639  }
640  // Buffer of data to receive. The Distributor figured out for
641  // us how many packets we're receiving, when we called its
642  // createFromSends() method to set up the distribution plan.
643  Kokkos::View<GO*, Kokkos::HostSpace> importElements("importElements", packetSize * distor.getTotalReceiveLength());
644 
645  // Distribute the triples of (GID, process ID, LID).
646  distor.doPostsAndWaits(exportEntries, packetSize, importElements);
647 
648  // Unpack the redistributed data. Both implementations of
649  // Directory storage map from an LID in the Directory Map (which
650  // is the LID of the GID to store) to either a PID or an LID in
651  // the input Map. Each "packet" (contiguous chunk of
652  // importElements) contains a triple: (GID, PID, LID).
653  if (useHashTables_) {
654  // Create the hash tables. We know exactly how many elements
655  // to expect in each hash table. FixedHashTable's constructor
656  // currently requires all the keys and values at once, so we
657  // have to extract them in temporary arrays. It may be
658  // possible to rewrite FixedHashTable to use a "start fill" /
659  // "end fill" approach that avoids the temporary arrays, but
660  // we won't try that for now.
661 
662  // The constructors of Array and ArrayRCP that take a number
663  // of elements all initialize the arrays. Instead, allocate
664  // raw arrays, then hand them off to ArrayRCP, to avoid the
665  // initial unnecessary initialization without losing the
666  // benefit of exception safety (and bounds checking, in a
667  // debug build).
668  LO* tableKeysRaw = NULL;
669  LO* tableLidsRaw = NULL;
670  int* tablePidsRaw = NULL;
671  try {
672  tableKeysRaw = new LO[numReceives];
673  tableLidsRaw = new LO[numReceives];
674  tablePidsRaw = new int[numReceives];
675  } catch (...) {
676  if (tableKeysRaw != NULL) {
677  delete[] tableKeysRaw;
678  }
679  if (tableLidsRaw != NULL) {
680  delete[] tableLidsRaw;
681  }
682  if (tablePidsRaw != NULL) {
683  delete[] tablePidsRaw;
684  }
685  throw;
686  }
687  ArrayRCP<LO> tableKeys(tableKeysRaw, 0, numReceives, true);
688  ArrayRCP<LO> tableLids(tableLidsRaw, 0, numReceives, true);
689  ArrayRCP<int> tablePids(tablePidsRaw, 0, numReceives, true);
690 
691  if (tie_break.is_null()) {
692  // Fill the temporary arrays of keys and values.
693  size_type importIndex = 0;
694  for (size_type i = 0; i < static_cast<size_type>(numReceives); ++i) {
695  const GO curGID = importElements[importIndex++];
696  const LO curLID = directoryMap_->getLocalElement(curGID);
697  TEUCHOS_TEST_FOR_EXCEPTION(
698  curLID == LINVALID, std::logic_error,
699  Teuchos::typeName(*this) << " constructor: Incoming global index "
700  << curGID << " does not have a corresponding local index in the "
701  "Directory Map. Please report this bug to the Tpetra developers.");
702  tableKeys[i] = curLID;
703  tablePids[i] = importElements[importIndex++];
704  tableLids[i] = importElements[importIndex++];
705  }
706  // Set up the hash tables. The hash tables' constructor
707  // detects whether there are duplicates, so that we can set
708  // locallyOneToOne_.
709  lidToPidTable_ =
710  rcp(new lidToPidTable_type(tableKeys(), tablePids()));
711  locallyOneToOne_ = !(lidToPidTable_->hasDuplicateKeys());
712  lidToLidTable_ =
713  rcp(new lidToLidTable_type(tableKeys(), tableLids()));
714  } else { // tie_break is NOT null
715 
716  // For each directory Map LID received, collect all the
717  // corresponding (PID,LID) pairs. If the input Map is not
718  // one-to-one, corresponding directory Map LIDs will have
719  // more than one pair. In that case, we will use the
720  // TieBreak object to pick exactly one pair.
721  typedef std::map<LO, std::vector<std::pair<int, LO> > > pair_table_type;
722  pair_table_type ownedPidLidPairs;
723 
724  // For each directory Map LID received, collect the zero or
725  // more input Map (PID,LID) pairs into ownedPidLidPairs.
726  size_type importIndex = 0;
727  for (size_type i = 0; i < static_cast<size_type>(numReceives); ++i) {
728  const GO curGID = importElements[importIndex++];
729  const LO dirMapLid = directoryMap_->getLocalElement(curGID);
730  TEUCHOS_TEST_FOR_EXCEPTION(
731  dirMapLid == LINVALID, std::logic_error,
732  Teuchos::typeName(*this) << " constructor: Incoming global index "
733  << curGID << " does not have a corresponding local index in the "
734  "Directory Map. Please report this bug to the Tpetra developers.");
735  tableKeys[i] = dirMapLid;
736  const int PID = importElements[importIndex++];
737  const int LID = importElements[importIndex++];
738 
739  // These may change below. We fill them in just to ensure
740  // that they won't have invalid values.
741  tablePids[i] = PID;
742  tableLids[i] = LID;
743 
744  // For every directory Map LID, we have to remember all
745  // (PID, LID) pairs. The TieBreak object will arbitrate
746  // between them in the loop below.
747  ownedPidLidPairs[dirMapLid].push_back(std::make_pair(PID, LID));
748  }
749 
750  // Use TieBreak to arbitrate between (PID,LID) pairs
751  // corresponding to each directory Map LID.
752  //
753  // FIXME (mfh 23 Mar 2014) How do I know that i is the same
754  // as the directory Map LID?
755  // KDD 21 Mar 2018: It isn't, especially if the user's IDs are not
756  // contiguous, but the directory map is. Need to set tablePids[i]
757  // and tableLids[i], so need to loop over numReceives (as that is
758  // how those arrays are allocated). FIXED
759 
760  for (size_type i = 0; i < static_cast<size_type>(numReceives); ++i) {
761  const LO dirMapLid = tableKeys[i];
762  const std::vector<std::pair<int, LO> >& pidLidList =
763  ownedPidLidPairs[dirMapLid];
764  const size_t listLen = pidLidList.size();
765  if (listLen == 0) continue; // KDD This will never happen
766  const GO dirMapGid = directoryMap_->getGlobalElement(dirMapLid);
767  if (listLen > 1) {
768  locallyOneToOne_ = false;
769  }
770  // If there is some (PID,LID) pair for the current input
771  // Map LID, then it makes sense to invoke the TieBreak
772  // object to arbitrate between the options. Even if
773  // there is only one (PID,LID) pair, we still want to
774  // give the TieBreak object a chance to do whatever it
775  // likes to do, in terms of side effects (e.g., track
776  // (PID,LID) pairs).
777  const size_type index =
778  static_cast<size_type>(tie_break->selectedIndex(dirMapGid,
779  pidLidList));
780  tablePids[i] = pidLidList[index].first;
781  tableLids[i] = pidLidList[index].second;
782  }
783 
784  // Set up the hash tables.
785  lidToPidTable_ =
786  rcp(new lidToPidTable_type(tableKeys(), tablePids()));
787  lidToLidTable_ =
788  rcp(new lidToLidTable_type(tableKeys(), tableLids()));
789  }
790  } else {
791  if (tie_break.is_null()) {
792  // Use array-based implementation of Directory storage.
793  // Allocate these arrays and fill them with invalid values,
794  // in case the input Map's GID list is sparse (i.e., does
795  // not populate all GIDs from minAllGID to maxAllGID).
796  PIDs_ = arcp<int>(dir_numMyEntries);
797  std::fill(PIDs_.begin(), PIDs_.end(), -1);
798  LIDs_ = arcp<LO>(dir_numMyEntries);
799  std::fill(LIDs_.begin(), LIDs_.end(), LINVALID);
800  // Fill in the arrays with PIDs resp. LIDs.
801  size_type importIndex = 0;
802  for (size_type i = 0; i < static_cast<size_type>(numReceives); ++i) {
803  const GO curGID = importElements[importIndex++];
804  const LO curLID = directoryMap_->getLocalElement(curGID);
805  TEUCHOS_TEST_FOR_EXCEPTION(curLID == LINVALID, std::logic_error,
806  Teuchos::typeName(*this) << " constructor: Incoming global index "
807  << curGID << " does not have a corresponding local index in the "
808  "Directory Map. Please report this bug to the Tpetra developers.");
809 
810  // If PIDs_[curLID] is not -1, then curGID is a duplicate
811  // on the calling process, so the Directory is not locally
812  // one-to-one.
813  if (PIDs_[curLID] != -1) {
814  locallyOneToOne_ = false;
815  }
816  PIDs_[curLID] = importElements[importIndex++];
817  LIDs_[curLID] = importElements[importIndex++];
818  }
819  } else {
820  PIDs_ = arcp<int>(dir_numMyEntries);
821  LIDs_ = arcp<LO>(dir_numMyEntries);
822  std::fill(PIDs_.begin(), PIDs_.end(), -1);
823 
824  // All received (PID, LID) pairs go into ownedPidLidPairs.
825  // This is a map from the directory Map's LID to the (PID,
826  // LID) pair (where the latter LID comes from the input Map,
827  // not the directory Map). If the input Map is not
828  // one-to-one, corresponding LIDs will have
829  // ownedPidLidPairs[curLID].size() > 1. In that case, we
830  // will use the TieBreak object to pick exactly one pair.
831  Array<std::vector<std::pair<int, LO> > > ownedPidLidPairs(dir_numMyEntries);
832  size_t importIndex = 0;
833  for (size_t i = 0; i < numReceives; ++i) {
834  const GO GID = importElements[importIndex++];
835  const int PID = importElements[importIndex++];
836  const LO LID = importElements[importIndex++];
837 
838  const LO dirMapLid = directoryMap_->getLocalElement(GID);
839  TEUCHOS_TEST_FOR_EXCEPTION(
840  dirMapLid == LINVALID, std::logic_error,
841  Teuchos::typeName(*this) << " constructor: Incoming global index "
842  << GID << " does not have a corresponding local index in the "
843  "Directory Map. Please report this bug to the Tpetra developers.");
844  ownedPidLidPairs[dirMapLid].push_back(std::make_pair(PID, LID));
845  }
846 
847  // Use TieBreak to arbitrate between (PID,LID) pairs
848  // corresponding to each directory Map LID.
849  //
850  // FIXME (mfh 23 Mar 2014) How do I know that i is the same
851  // as the directory Map LID?
852  // KDD 21 Mar 2018: It isn't, especially if the user's IDs are not
853  // contiguous. Loop over all ownedPidLidPairs; skip those that have
854  // empty lists. FIXED
855 
856  for (size_t i = 0; i < dir_numMyEntries; ++i) {
857  const std::vector<std::pair<int, LO> >& pidLidList =
858  ownedPidLidPairs[i];
859  const size_t listLen = pidLidList.size();
860  if (listLen == 0) continue; // KDD will happen for GIDs not in
861  // KDD the user's source map
862  const LO dirMapLid = static_cast<LO>(i);
863  const GO dirMapGid = directoryMap_->getGlobalElement(dirMapLid);
864  if (listLen > 1) {
865  locallyOneToOne_ = false;
866  }
867  // If there is some (PID,LID) pair for the current input
868  // Map LID, then it makes sense to invoke the TieBreak
869  // object to arbitrate between the options. Even if
870  // there is only one (PID,LID) pair, we still want to
871  // give the TieBreak object a chance to do whatever it
872  // likes to do, in terms of side effects (e.g., track
873  // (PID,LID) pairs).
874  const size_type index =
875  static_cast<size_type>(tie_break->selectedIndex(dirMapGid,
876  pidLidList));
877  PIDs_[i] = pidLidList[index].first;
878  LIDs_[i] = pidLidList[index].second;
879  }
880  }
881  }
882 }
883 
884 template <class LO, class GO, class NT>
885 std::string
887  std::ostringstream os;
888  os << "DistributedNoncontiguousDirectory"
889  << "<" << Teuchos::TypeNameTraits<LO>::name()
890  << ", " << Teuchos::TypeNameTraits<GO>::name()
891  << ", " << Teuchos::TypeNameTraits<NT>::name() << ">";
892  return os.str();
893 }
894 
895 template <class LO, class GO, class NT>
898  getEntriesImpl(const map_type& map,
899  const Teuchos::ArrayView<const GO>& globalIDs,
900  const Teuchos::ArrayView<int>& nodeIDs,
901  const Teuchos::ArrayView<LO>& localIDs,
902  const bool computeLIDs) const {
903  using Details::Behavior;
905  using std::cerr;
906  using std::endl;
907  using Teuchos::Array;
908  using Teuchos::ArrayRCP;
909  using Teuchos::ArrayView;
910  using Teuchos::as;
911  using Teuchos::RCP;
912  using Teuchos::toString;
913  using size_type = typename Array<GO>::size_type;
914  const char funcPrefix[] =
915  "Tpetra::"
916  "DistributedNoncontiguousDirectory::getEntriesImpl: ";
917  const char errSuffix[] =
918  " Please report this bug to the Tpetra developers.";
919 
920  RCP<const Teuchos::Comm<int> > comm = map.getComm();
921  const bool verbose = Behavior::verbose("Directory") ||
922  Behavior::verbose("Tpetra::Directory");
923  const size_t maxNumToPrint = verbose ? Behavior::verbosePrintCountThreshold() : size_t(0);
924 
925  std::unique_ptr<std::string> procPrefix;
926  if (verbose) {
927  std::ostringstream os;
928  os << "Proc ";
929  if (map.getComm().is_null()) {
930  os << "?";
931  } else {
932  os << map.getComm()->getRank();
933  }
934  os << ": ";
935  procPrefix = std::unique_ptr<std::string>(
936  new std::string(os.str()));
937  os << funcPrefix << "{";
938  verbosePrintArray(os, globalIDs, "GIDs", maxNumToPrint);
939  os << ", ";
940  verbosePrintArray(os, nodeIDs, "PIDs", maxNumToPrint);
941  os << ", ";
942  verbosePrintArray(os, localIDs, "LIDs", maxNumToPrint);
943  os << ", computeLIDs: "
944  << (computeLIDs ? "true" : "false") << "}" << endl;
945  cerr << os.str();
946  }
947 
948  const size_t numEntries = globalIDs.size();
949  const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid();
951 
952  //
953  // Set up directory structure.
954  //
955 
956  // If we're computing LIDs, we also have to include them in each
957  // packet, along with the GID and process ID.
958  const int packetSize = computeLIDs ? 3 : 2;
959 
960  // For data distribution, we use: Surprise! A Distributor!
961  Distributor distor(comm);
962 
963  // Get directory locations for the requested list of entries.
964  Array<int> dirImages(numEntries);
965  if (verbose) {
966  std::ostringstream os;
967  os << *procPrefix << "Call directoryMap_->getRemoteIndexList"
968  << endl;
969  cerr << os.str();
970  }
971  res = directoryMap_->getRemoteIndexList(globalIDs, dirImages());
972  if (verbose) {
973  std::ostringstream os;
974  os << *procPrefix << "Director Map getRemoteIndexList out ";
975  verbosePrintArray(os, dirImages, "PIDs", maxNumToPrint);
976  os << endl;
977  cerr << os.str();
978  }
979 
980  // Check for unfound globalIDs and set corresponding nodeIDs to -1
981  size_t numMissing = 0;
982  if (res == IDNotPresent) {
983  for (size_t i = 0; i < numEntries; ++i) {
984  if (dirImages[i] == -1) {
985  nodeIDs[i] = -1;
986  if (computeLIDs) {
987  localIDs[i] = LINVALID;
988  }
989  numMissing++;
990  }
991  }
992  }
993 
994  Array<GO> sendGIDs;
995  Array<int> sendImages;
996  if (verbose) {
997  std::ostringstream os;
998  os << *procPrefix << "Call Distributor::createFromRecvs"
999  << endl;
1000  cerr << os.str();
1001  }
1002  distor.createFromRecvs(globalIDs, dirImages(), sendGIDs, sendImages);
1003  if (verbose) {
1004  std::ostringstream os;
1005  os << *procPrefix << "Distributor::createFromRecvs result: "
1006  << "{";
1007  verbosePrintArray(os, sendGIDs, "sendGIDs", maxNumToPrint);
1008  os << ", ";
1009  verbosePrintArray(os, sendImages, "sendPIDs", maxNumToPrint);
1010  os << "}" << endl;
1011  cerr << os.str();
1012  }
1013  const size_type numSends = sendGIDs.size();
1014 
1015  //
1016  // mfh 13 Nov 2012:
1017  //
1018  // The code below temporarily stores LO, GO, and int values in
1019  // an array of global_size_t. If one of the signed types (LO
1020  // and GO should both be signed) happened to be -1 (or some
1021  // negative number, but -1 is the one that came up today), then
1022  // conversion to global_size_t will result in a huge
1023  // global_size_t value, and thus conversion back may overflow.
1024  // (Teuchos::as doesn't know that we meant it to be an LO or GO
1025  // all along.)
1026  //
1027  // The overflow normally would not be a problem, since it would
1028  // just go back to -1 again. However, Teuchos::as does range
1029  // checking on conversions in a debug build, so it throws an
1030  // exception (std::range_error) in this case. Range checking is
1031  // generally useful in debug mode, so we don't want to disable
1032  // this behavior globally.
1033  //
1034  // We solve this problem by forgoing use of Teuchos::as for the
1035  // conversions below from LO, GO, or int to global_size_t, and
1036  // the later conversions back from global_size_t to LO, GO, or
1037  // int.
1038  //
1039  // I've recorded this discussion as Bug 5760.
1040  //
1041 
1042  // global_size_t >= GO
1043  // global_size_t >= size_t >= int
1044  // global_size_t >= size_t >= LO
1045  // Therefore, we can safely store all of these in a global_size_t
1046  Kokkos::View<global_size_t*, Kokkos::HostSpace> exports("exports", packetSize * numSends);
1047  {
1048  // Packet format:
1049  // - If computing LIDs: (GID, PID, LID)
1050  // - Otherwise: (GID, PID)
1051  //
1052  // "PID" means "process ID" (a.k.a. "node ID," a.k.a. "rank").
1053 
1054  // Current position to which to write in exports array. If
1055  // sending pairs, we pack the (GID, PID) pair for gid =
1056  // sendGIDs[k] in exports[2*k], exports[2*k+1]. If sending
1057  // triples, we pack the (GID, PID, LID) pair for gid =
1058  // sendGIDs[k] in exports[3*k, 3*k+1, 3*k+2].
1059  size_t exportsIndex = 0;
1060 
1061  if (useHashTables_) {
1062  if (verbose) {
1063  std::ostringstream os;
1064  os << *procPrefix << "Pack exports (useHashTables_ true)"
1065  << endl;
1066  cerr << os.str();
1067  }
1068  for (size_type gidIndex = 0; gidIndex < numSends; ++gidIndex) {
1069  const GO curGID = sendGIDs[gidIndex];
1070  // Don't use as() here (see above note).
1071  exports[exportsIndex++] = static_cast<global_size_t>(curGID);
1072  const LO curLID = directoryMap_->getLocalElement(curGID);
1073  TEUCHOS_TEST_FOR_EXCEPTION(curLID == LINVALID, std::logic_error, funcPrefix << "Directory Map's global index " << curGID << " lacks "
1074  "a corresponding local index."
1075  << errSuffix);
1076  // Don't use as() here (see above note).
1077  exports[exportsIndex++] =
1078  static_cast<global_size_t>(lidToPidTable_->get(curLID));
1079  if (computeLIDs) {
1080  // Don't use as() here (see above note).
1081  exports[exportsIndex++] =
1082  static_cast<global_size_t>(lidToLidTable_->get(curLID));
1083  }
1084  }
1085  } else {
1086  if (verbose) {
1087  std::ostringstream os;
1088  os << *procPrefix << "Pack exports (useHashTables_ false)"
1089  << endl;
1090  cerr << os.str();
1091  }
1092  for (size_type gidIndex = 0; gidIndex < numSends; ++gidIndex) {
1093  const GO curGID = sendGIDs[gidIndex];
1094  // Don't use as() here (see above note).
1095  exports[exportsIndex++] = static_cast<global_size_t>(curGID);
1096  const LO curLID = directoryMap_->getLocalElement(curGID);
1097  TEUCHOS_TEST_FOR_EXCEPTION(curLID == LINVALID, std::logic_error, funcPrefix << "Directory Map's global index " << curGID << " lacks "
1098  "a corresponding local index."
1099  << errSuffix);
1100  // Don't use as() here (see above note).
1101  exports[exportsIndex++] =
1102  static_cast<global_size_t>(PIDs_[curLID]);
1103  if (computeLIDs) {
1104  // Don't use as() here (see above note).
1105  exports[exportsIndex++] =
1106  static_cast<global_size_t>(LIDs_[curLID]);
1107  }
1108  }
1109  }
1110 
1111  TEUCHOS_TEST_FOR_EXCEPTION(exportsIndex > exports.size(), std::logic_error,
1112  funcPrefix << "On Process " << comm->getRank() << ", "
1113  "exportsIndex = "
1114  << exportsIndex << " > exports.size() = "
1115  << exports.size() << "." << errSuffix);
1116  }
1117 
1118  TEUCHOS_TEST_FOR_EXCEPTION(numEntries < numMissing, std::logic_error, funcPrefix << "On Process " << comm->getRank() << ", numEntries = " << numEntries << " < numMissing = " << numMissing << "." << errSuffix);
1119 
1120  //
1121  // mfh 13 Nov 2012: See note above on conversions between
1122  // global_size_t and LO, GO, or int.
1123  //
1124  const size_t numRecv = numEntries - numMissing;
1125 
1126  {
1127  const size_t importLen = packetSize * distor.getTotalReceiveLength();
1128  const size_t requiredImportLen = numRecv * packetSize;
1129  const int myRank = comm->getRank();
1130  TEUCHOS_TEST_FOR_EXCEPTION(
1131  importLen < requiredImportLen, std::logic_error,
1132  "Tpetra::Details::DistributedNoncontiguousDirectory::getEntriesImpl: "
1133  "On Process "
1134  << myRank << ": The 'imports' array must have length "
1135  "at least "
1136  << requiredImportLen << ", but its actual length is " << importLen << ". numRecv: " << numRecv << ", packetSize: " << packetSize << ", numEntries (# GIDs): " << numEntries << ", numMissing: " << numMissing << ": distor.getTotalReceiveLength(): "
1137  << distor.getTotalReceiveLength() << ". " << std::endl
1138  << "Distributor description: " << distor.description() << ". "
1139  << std::endl
1140  << "Please report this bug to the Tpetra developers.");
1141  }
1142 
1143  Kokkos::View<global_size_t*, Kokkos::HostSpace> imports("imports", packetSize * distor.getTotalReceiveLength());
1144  // FIXME (mfh 20 Mar 2014) One could overlap the sort2() below
1145  // with communication, by splitting this call into doPosts and
1146  // doWaits. The code is still correct in this form, however.
1147  if (verbose) {
1148  std::ostringstream os;
1149  os << *procPrefix << "Call doPostsAndWaits: {"
1150  << "packetSize: " << packetSize << ", ";
1151  verbosePrintArray(os, exports, "exports", maxNumToPrint);
1152  os << "}" << endl;
1153  cerr << os.str();
1154  }
1155  distor.doPostsAndWaits(exports, packetSize, imports);
1156  if (verbose) {
1157  std::ostringstream os;
1158  os << *procPrefix << "doPostsAndWaits result: ";
1159  verbosePrintArray(os, imports, "imports", maxNumToPrint);
1160  os << endl;
1161  cerr << os.str();
1162  }
1163 
1164  Array<GO> sortedIDs(globalIDs); // deep copy (for later sorting)
1165  Array<GO> offset(numEntries); // permutation array (sort2 output)
1166  for (GO ii = 0; ii < static_cast<GO>(numEntries); ++ii) {
1167  offset[ii] = ii;
1168  }
1169  sort2(sortedIDs.begin(), sortedIDs.begin() + numEntries, offset.begin());
1170  if (verbose) {
1171  std::ostringstream os;
1172  os << *procPrefix;
1173  verbosePrintArray(os, sortedIDs, "sortedIDs", maxNumToPrint);
1174  os << ", ";
1175  verbosePrintArray(os, offset, "offset", maxNumToPrint);
1176  os << endl;
1177  cerr << os.str();
1178  }
1179 
1180  size_t importsIndex = 0;
1181 
1182  // we know these conversions are in range, because we loaded this data
1183  //
1184  // Don't use as() for conversions here; we know they are in range.
1185  for (size_t i = 0; i < numRecv; ++i) {
1186  const GO curGID = static_cast<GO>(imports[importsIndex++]);
1187  auto p1 = std::equal_range(sortedIDs.begin(),
1188  sortedIDs.end(), curGID);
1189  if (p1.first != p1.second) {
1190  const size_t j = p1.first - sortedIDs.begin();
1191  nodeIDs[offset[j]] =
1192  static_cast<int>(imports[importsIndex++]);
1193  if (computeLIDs) {
1194  localIDs[offset[j]] =
1195  static_cast<LO>(imports[importsIndex++]);
1196  }
1197  if (nodeIDs[offset[j]] == -1) {
1198  res = IDNotPresent;
1199  }
1200  }
1201  }
1202 
1203  TEUCHOS_TEST_FOR_EXCEPTION(size_t(importsIndex) > size_t(imports.size()),
1204  std::logic_error, funcPrefix << "On Process " << comm->getRank() << ": importsIndex = " << importsIndex << " > imports.size() = " << imports.size() << ". "
1205  "numRecv: "
1206  << numRecv << ", packetSize: " << packetSize << ", "
1207  "numEntries (# GIDs): "
1208  << numEntries << ", numMissing: " << numMissing << ": distor.getTotalReceiveLength(): " << distor.getTotalReceiveLength() << "." << errSuffix);
1209  if (verbose) {
1210  std::ostringstream os;
1211  os << *procPrefix << funcPrefix << "Done!" << endl;
1212  cerr << os.str();
1213  }
1214  return res;
1215 }
1216 
1217 template <class LO, class GO, class NT>
1219  isOneToOne(const Teuchos::Comm<int>& comm) const {
1220  if (oneToOneResult_ == ONE_TO_ONE_NOT_CALLED_YET) {
1221  const int lcl121 = isLocallyOneToOne() ? 1 : 0;
1222  int gbl121 = 0;
1223  Teuchos::reduceAll<int, int>(comm, Teuchos::REDUCE_MIN, lcl121,
1224  Teuchos::outArg(gbl121));
1225  oneToOneResult_ = (gbl121 == 1) ? ONE_TO_ONE_TRUE : ONE_TO_ONE_FALSE;
1226  }
1227  return (oneToOneResult_ == ONE_TO_ONE_TRUE);
1228 }
1229 } // namespace Details
1230 } // namespace Tpetra
1231 
1232 //
1233 // Explicit instantiation macro
1234 //
1235 // Must be expanded from within the Tpetra namespace!
1236 //
1237 #define TPETRA_DIRECTORYIMPL_INSTANT(LO, GO, NODE) \
1238  namespace Details { \
1239  template class Directory<LO, GO, NODE>; \
1240  template class ReplicatedDirectory<LO, GO, NODE>; \
1241  template class ContiguousUniformDirectory<LO, GO, NODE>; \
1242  template class DistributedContiguousDirectory<LO, GO, NODE>; \
1243  template class DistributedNoncontiguousDirectory<LO, GO, NODE>; \
1244  }
1245 
1246 #endif // TPETRA_DIRECTORYIMPL_DEF_HPP
void initialize(int *argc, char ***argv)
Initialize Tpetra.
std::string description() const override
A one-line human-readable description of this object.
Interface for breaking ties in ownership.
std::string description() const
Return a one-line description of this object.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
std::string description() const override
A one-line human-readable description of this object.
Implementation of Directory for a distributed noncontiguous Map.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
bool isOneToOne(const Teuchos::Comm< int > &comm) const override
Whether the Directory&#39;s input Map is (globally) one to one.
Implementation of Directory for a distributed contiguous Map.
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const override
Find process IDs and (optionally) local IDs for the given global IDs.
bool isOneToOne(const Teuchos::Comm< int > &comm) const override
Whether the Directory&#39;s input Map is (globally) one to one.
size_t global_size_t
Global size_t object.
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const override
Find process IDs and (optionally) local IDs for the given global IDs.
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2, const bool stableSort=false)
Sort the first array, and apply the resulting permutation to the second array.
LookupStatus getEntries(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const
bool isNodeGlobalElement(global_ordinal_type globalIndex) const
Whether the given global index is owned by this Map on the calling process.
Sets up and executes a communication plan for a Tpetra DistObject.
static bool verbose()
Whether Tpetra is in verbose mode.
size_t getTotalReceiveLength() const
Total number of values this process will receive from other processes.
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const override
Find process IDs and (optionally) local IDs for the given global IDs.
std::string description() const override
A one-line human-readable description of this object.
A parallel distribution of indices over processes.
ReplicatedDirectory()=default
Constructor (that takes no arguments).
std::enable_if<(Kokkos::is_view< ExpView >::value &&Kokkos::is_view< ImpView >::value)>::type doPostsAndWaits(const ExpView &exports, size_t numPackets, const ImpView &imports)
Execute the (forward) communication plan.
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
Stand-alone utility functions and macros.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const override
Find process IDs and (optionally) local IDs for the given global IDs.
Implementation of Directory for a contiguous, uniformly distributed Map.
void createFromRecvs(const Teuchos::ArrayView< const Ordinal > &remoteIDs, const Teuchos::ArrayView< const int > &remoteProcIDs, Teuchos::Array< Ordinal > &exportIDs, Teuchos::Array< int > &exportProcIDs)
Set up Distributor using list of process ranks from which to receive.
std::string description() const override
A one-line human-readable description of this object.
global_ordinal_type getMinAllGlobalIndex() const
The minimum global index over all processes in the communicator.
global_size_t getGlobalNumElements() const
The number of elements in this Map.
Description of Tpetra&#39;s behavior.