Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_DirectoryImpl_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_DIRECTORYIMPL_DEF_HPP
11 #define TPETRA_DIRECTORYIMPL_DEF_HPP
12 
15 
16 #include "Tpetra_Distributor.hpp"
17 #include "Tpetra_Map.hpp"
18 #include "Tpetra_TieBreak.hpp"
19 #include "Tpetra_Util.hpp"
20 #include "Tpetra_Details_FixedHashTable.hpp"
21 #include "Teuchos_Comm.hpp"
22 #include <memory>
23 #include <sstream>
24 
25 // FIXME (mfh 16 Apr 2013) GIANT HACK BELOW
26 #ifdef HAVE_TPETRACORE_MPI
27 # include <mpi.h>
28 #endif // HAVE_TPETRACORE_MPI
29 // FIXME (mfh 16 Apr 2013) GIANT HACK ABOVE
30 
31 namespace Tpetra {
32  namespace Details {
33  template<class LO, class GO, class NT>
36  getEntries (const map_type& map,
37  const Teuchos::ArrayView<const GO> &globalIDs,
38  const Teuchos::ArrayView<int> &nodeIDs,
39  const Teuchos::ArrayView<LO> &localIDs,
40  const bool computeLIDs) const
41  {
42  // Ensure that globalIDs, nodeIDs, and localIDs (if applicable)
43  // all have the same size, before modifying any output arguments.
44  TEUCHOS_TEST_FOR_EXCEPTION(nodeIDs.size() != globalIDs.size(),
45  std::invalid_argument, Teuchos::typeName(*this) << "::getEntries(): "
46  "Output arrays do not have the right sizes. nodeIDs.size() = "
47  << nodeIDs.size() << " != globalIDs.size() = " << globalIDs.size()
48  << ".");
49  TEUCHOS_TEST_FOR_EXCEPTION(
50  computeLIDs && localIDs.size() != globalIDs.size(),
51  std::invalid_argument, Teuchos::typeName(*this) << "::getEntries(): "
52  "Output array do not have the right sizes. localIDs.size() = "
53  << localIDs.size() << " != globalIDs.size() = " << globalIDs.size()
54  << ".");
55 
56  // Initially, fill nodeIDs and localIDs (if applicable) with
57  // invalid values. The "invalid" process ID is -1 (this means
58  // the same thing as MPI_ANY_SOURCE to Teuchos, so it's an
59  // "invalid" process ID); the invalid local ID comes from
60  // OrdinalTraits.
61  std::fill (nodeIDs.begin(), nodeIDs.end(), -1);
62  if (computeLIDs) {
63  std::fill (localIDs.begin(), localIDs.end(),
64  Teuchos::OrdinalTraits<LO>::invalid ());
65  }
66  // Actually do the work.
67  return this->getEntriesImpl (map, globalIDs, nodeIDs, localIDs, computeLIDs);
68  }
69 
70 
71  template<class LO, class GO, class NT>
74  numProcs_ (map.getComm ()->getSize ())
75  {}
76 
77 
78  template<class LO, class GO, class NT>
79  bool
81  isOneToOne (const Teuchos::Comm<int>& /* comm */) const
82  {
83  // A locally replicated Map is one-to-one only if there is no
84  // replication, that is, only if the Map's communicator only has
85  // one process.
86  return (numProcs_ == 1);
87  }
88 
89 
90  template<class LO, class GO, class NT>
91  std::string
93  {
94  std::ostringstream os;
95  os << "ReplicatedDirectory"
96  << "<" << Teuchos::TypeNameTraits<LO>::name ()
97  << ", " << Teuchos::TypeNameTraits<GO>::name ()
98  << ", " << Teuchos::TypeNameTraits<NT>::name () << ">";
99  return os.str ();
100  }
101 
102 
103  template<class LO, class GO, class NT>
105  ContiguousUniformDirectory (const map_type& map)
106  {
107  TEUCHOS_TEST_FOR_EXCEPTION(! map.isContiguous (), std::invalid_argument,
108  Teuchos::typeName (*this) << " constructor: Map is not contiguous.");
109  TEUCHOS_TEST_FOR_EXCEPTION(! map.isUniform (), std::invalid_argument,
110  Teuchos::typeName (*this) << " constructor: Map is not uniform.");
111  }
112 
113 
114  template<class LO, class GO, class NT>
115  std::string
117  {
118  std::ostringstream os;
119  os << "ContiguousUniformDirectory"
120  << "<" << Teuchos::TypeNameTraits<LO>::name ()
121  << ", " << Teuchos::TypeNameTraits<GO>::name ()
122  << ", " << Teuchos::TypeNameTraits<NT>::name () << ">";
123  return os.str ();
124  }
125 
126 
127  template<class LO, class GO, class NT>
131  const Teuchos::ArrayView<const GO> &globalIDs,
132  const Teuchos::ArrayView<int> &nodeIDs,
133  const Teuchos::ArrayView<LO> &localIDs,
134  const bool computeLIDs) const
135  {
136  using Teuchos::Comm;
137  using Teuchos::RCP;
138  typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
139  const LO invalidLid = Teuchos::OrdinalTraits<LO>::invalid ();
141 
142  RCP<const Comm<int> > comm = map.getComm ();
143  const GO g_min = map.getMinAllGlobalIndex ();
144 
145  // Let N_G be the global number of elements in the Map,
146  // and P be the number of processes in its communicator.
147  // Then, N_G = P * N_L + R = R*(N_L + 1) + (P - R)*N_L.
148  //
149  // The first R processes own N_L+1 elements.
150  // The remaining P-R processes own N_L elements.
151  //
152  // Let g be the current GID, g_min be the global minimum GID,
153  // and g_0 = g - g_min. If g is a valid GID in this Map, then
154  // g_0 is in [0, N_G - 1].
155  //
156  // If g is a valid GID in this Map and g_0 < R*(N_L + 1), then
157  // the rank of the process that owns g is floor(g_0 / (N_L +
158  // 1)), and its corresponding local index on that process is g_0
159  // mod (N_L + 1).
160  //
161  // Let g_R = g_0 - R*(N_L + 1). If g is a valid GID in this Map
162  // and g_0 >= R*(N_L + 1), then the rank of the process that
163  // owns g is then R + floor(g_R / N_L), and its corresponding
164  // local index on that process is g_R mod N_L.
165 
166  const size_type N_G =
167  static_cast<size_type> (map.getGlobalNumElements ());
168  const size_type P = static_cast<size_type> (comm->getSize ());
169  const size_type N_L = N_G / P;
170  const size_type R = N_G - N_L * P; // N_G mod P
171  const size_type N_R = R * (N_L + static_cast<size_type> (1));
172 
173 #ifdef HAVE_TPETRA_DEBUG
174  TEUCHOS_TEST_FOR_EXCEPTION(
175  N_G != P*N_L + R, std::logic_error,
176  "Tpetra::ContiguousUniformDirectory::getEntriesImpl: "
177  "N_G = " << N_G << " != P*N_L + R = " << P << "*" << N_L << " + " << R
178  << " = " << P*N_L + R << ". "
179  "Please report this bug to the Tpetra developers.");
180 #endif // HAVE_TPETRA_DEBUG
181 
182  const size_type numGids = globalIDs.size (); // for const loop bound
183  // Avoid signed/unsigned comparisons below, in case GO is
184  // unsigned. (Integer literals are generally signed.)
185  const GO ONE = static_cast<GO> (1);
186 
187  if (computeLIDs) {
188  for (size_type k = 0; k < numGids; ++k) {
189  const GO g_0 = globalIDs[k] - g_min;
190 
191  // The first test is a little strange just in case GO is
192  // unsigned. Compilers raise a warning on tests like "x <
193  // 0" if x is unsigned, but don't usually raise a warning if
194  // the expression is a bit more complicated than that.
195  if (g_0 + ONE < ONE || g_0 >= static_cast<GO> (N_G)) {
196  nodeIDs[k] = -1;
197  localIDs[k] = invalidLid;
198  res = IDNotPresent;
199  }
200  else if (g_0 < static_cast<GO> (N_R)) {
201  // The GID comes from the initial sequence of R processes.
202  nodeIDs[k] = static_cast<int> (g_0 / static_cast<GO> (N_L + 1));
203  localIDs[k] = static_cast<LO> (g_0 % static_cast<GO> (N_L + 1));
204  }
205  else if (g_0 >= static_cast<GO> (N_R)) {
206  // The GID comes from the remaining P-R processes.
207  const GO g_R = g_0 - static_cast<GO> (N_R);
208  nodeIDs[k] = static_cast<int> (R + g_R / N_L);
209  localIDs[k] = static_cast<int> (g_R % N_L);
210  }
211 #ifdef HAVE_TPETRA_DEBUG
212  else {
213  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
214  "Tpetra::ContiguousUniformDirectory::getEntriesImpl: "
215  "should never get here. "
216  "Please report this bug to the Tpetra developers.");
217  }
218 #endif // HAVE_TPETRA_DEBUG
219  }
220  }
221  else { // don't compute local indices
222  for (size_type k = 0; k < numGids; ++k) {
223  const GO g_0 = globalIDs[k] - g_min;
224  // The first test is a little strange just in case GO is
225  // unsigned. Compilers raise a warning on tests like "x <
226  // 0" if x is unsigned, but don't usually raise a warning if
227  // the expression is a bit more complicated than that.
228  if (g_0 + ONE < ONE || g_0 >= static_cast<GO> (N_G)) {
229  nodeIDs[k] = -1;
230  res = IDNotPresent;
231  }
232  else if (g_0 < static_cast<GO> (N_R)) {
233  // The GID comes from the initial sequence of R processes.
234  nodeIDs[k] = static_cast<int> (g_0 / static_cast<GO> (N_L + 1));
235  }
236  else if (g_0 >= static_cast<GO> (N_R)) {
237  // The GID comes from the remaining P-R processes.
238  const GO g_R = g_0 - static_cast<GO> (N_R);
239  nodeIDs[k] = static_cast<int> (R + g_R / N_L);
240  }
241 #ifdef HAVE_TPETRA_DEBUG
242  else {
243  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
244  "Tpetra::ContiguousUniformDirectory::getEntriesImpl: "
245  "should never get here. "
246  "Please report this bug to the Tpetra developers.");
247  }
248 #endif // HAVE_TPETRA_DEBUG
249  }
250  }
251  return res;
252  }
253 
254  template<class LO, class GO, class NT>
256  DistributedContiguousDirectory (const map_type& map)
257  {
258  using Teuchos::arcp;
259  using Teuchos::gatherAll;
260  using Teuchos::RCP;
261 
262  RCP<const Teuchos::Comm<int> > comm = map.getComm ();
263 
264  TEUCHOS_TEST_FOR_EXCEPTION(! map.isDistributed (), std::invalid_argument,
265  Teuchos::typeName (*this) << " constructor: Map is not distributed.");
266  TEUCHOS_TEST_FOR_EXCEPTION(! map.isContiguous (), std::invalid_argument,
267  Teuchos::typeName (*this) << " constructor: Map is not contiguous.");
268 
269  const int numProcs = comm->getSize ();
270 
271  // Make room for the min global ID on each process, plus one
272  // entry at the end for the "max cap."
273  allMinGIDs_ = arcp<GO> (numProcs + 1);
274  // Get my process' min global ID.
275  GO minMyGID = map.getMinGlobalIndex ();
276  // Gather all of the min global IDs into the first numProcs
277  // entries of allMinGIDs_.
278 
279  // FIXME (mfh 16 Apr 2013) GIANT HACK BELOW
280  //
281  // The purpose of this giant hack is that gatherAll appears to
282  // interpret the "receive count" argument differently than
283  // MPI_Allgather does. Matt Bettencourt reports Valgrind issues
284  // (memcpy with overlapping data) with MpiComm<int>::gatherAll,
285  // which could relate either to this, or to OpenMPI.
286 #ifdef HAVE_TPETRACORE_MPI
287  MPI_Datatype rawMpiType = MPI_INT;
288  bool useRawMpi = true;
289  if (typeid (GO) == typeid (int)) {
290  rawMpiType = MPI_INT;
291  } else if (typeid (GO) == typeid (long)) {
292  rawMpiType = MPI_LONG;
293  } else {
294  useRawMpi = false;
295  }
296  if (useRawMpi) {
297  using Teuchos::rcp_dynamic_cast;
298  using Teuchos::MpiComm;
299  RCP<const MpiComm<int> > mpiComm =
300  rcp_dynamic_cast<const MpiComm<int> > (comm);
301  // It could be a SerialComm instead, even in an MPI build, so
302  // be sure to check.
303  if (! comm.is_null ()) {
304  MPI_Comm rawMpiComm = * (mpiComm->getRawMpiComm ());
305  const int err =
306  MPI_Allgather (&minMyGID, 1, rawMpiType,
307  allMinGIDs_.getRawPtr (), 1, rawMpiType,
308  rawMpiComm);
309  TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
310  "Tpetra::DistributedContiguousDirectory: MPI_Allgather failed");
311  } else {
312  gatherAll<int, GO> (*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr ());
313  }
314  } else {
315  gatherAll<int, GO> (*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr ());
316  }
317 #else // NOT HAVE_TPETRACORE_MPI
318  gatherAll<int, GO> (*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr ());
319 #endif // HAVE_TPETRACORE_MPI
320  // FIXME (mfh 16 Apr 2013) GIANT HACK ABOVE
321 
322  //gatherAll<int, GO> (*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr ());
323 
324  // Put the max cap at the end. Adding one lets us write loops
325  // over the global IDs with the usual strict less-than bound.
326  allMinGIDs_[numProcs] = map.getMaxAllGlobalIndex ()
327  + Teuchos::OrdinalTraits<GO>::one ();
328  }
329 
330  template<class LO, class GO, class NT>
331  std::string
333  {
334  std::ostringstream os;
335  os << "DistributedContiguousDirectory"
336  << "<" << Teuchos::TypeNameTraits<LO>::name ()
337  << ", " << Teuchos::TypeNameTraits<GO>::name ()
338  << ", " << Teuchos::TypeNameTraits<NT>::name () << ">";
339  return os.str ();
340  }
341 
342  template<class LO, class GO, class NT>
346  const Teuchos::ArrayView<const GO> &globalIDs,
347  const Teuchos::ArrayView<int> &nodeIDs,
348  const Teuchos::ArrayView<LO> &localIDs,
349  const bool computeLIDs) const
350  {
351  using Teuchos::Array;
352  using Teuchos::ArrayRCP;
353  using Teuchos::ArrayView;
354  using Teuchos::as;
355  using Teuchos::Comm;
356  using Teuchos::RCP;
357 
359  RCP<const Teuchos::Comm<int> > comm = map.getComm ();
360  const int myRank = comm->getRank ();
361 
362  // Map is on one process or is locally replicated.
363  typename ArrayView<int>::iterator procIter = nodeIDs.begin();
364  typename ArrayView<LO>::iterator lidIter = localIDs.begin();
365  typename ArrayView<const GO>::iterator gidIter;
366  for (gidIter = globalIDs.begin(); gidIter != globalIDs.end(); ++gidIter) {
367  if (map.isNodeGlobalElement (*gidIter)) {
368  *procIter++ = myRank;
369  if (computeLIDs) {
370  *lidIter++ = map.getLocalElement (*gidIter);
371  }
372  }
373  else {
374  // Advance the pointers, leaving these values set to invalid
375  procIter++;
376  if (computeLIDs) {
377  lidIter++;
378  }
379  res = IDNotPresent;
380  }
381  }
382  return res;
383  }
384 
385  template<class LO, class GO, class NT>
389  const Teuchos::ArrayView<const GO> &globalIDs,
390  const Teuchos::ArrayView<int> &nodeIDs,
391  const Teuchos::ArrayView<LO> &localIDs,
392  const bool computeLIDs) const
393  {
394  using Teuchos::Array;
395  using Teuchos::ArrayRCP;
396  using Teuchos::ArrayView;
397  using Teuchos::as;
398  using Teuchos::Comm;
399  using Teuchos::RCP;
400 
401  RCP<const Teuchos::Comm<int> > comm = map.getComm ();
402  const int numProcs = comm->getSize ();
403  const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid();
404  const GO noGIDsOnProc = std::numeric_limits<GO>::max();
406 
407  // Find the first initialized GID for search below
408  int firstProcWithGIDs;
409  for (firstProcWithGIDs = 0; firstProcWithGIDs < numProcs;
410  firstProcWithGIDs++) {
411  if (allMinGIDs_[firstProcWithGIDs] != noGIDsOnProc) break;
412  }
413 
414  // If Map is empty, return invalid values for all requested lookups
415  // This case should not happen, as an empty Map is not considered
416  // Distributed.
417  if (firstProcWithGIDs == numProcs) {
418  // No entries in Map
419  res = (globalIDs.size() > 0) ? IDNotPresent : AllIDsPresent;
420  std::fill(nodeIDs.begin(), nodeIDs.end(), -1);
421  if (computeLIDs)
422  std::fill(localIDs.begin(), localIDs.end(), LINVALID);
423  return res;
424  }
425 
426  const GO one = as<GO> (1);
427  const GO nOverP = as<GO> (map.getGlobalNumElements ()
428  / as<global_size_t>(numProcs - firstProcWithGIDs));
429 
430  // Map is distributed but contiguous.
431  typename ArrayView<int>::iterator procIter = nodeIDs.begin();
432  typename ArrayView<LO>::iterator lidIter = localIDs.begin();
433  typename ArrayView<const GO>::iterator gidIter;
434  for (gidIter = globalIDs.begin(); gidIter != globalIDs.end(); ++gidIter) {
435  LO LID = LINVALID; // Assume not found until proven otherwise
436  int image = -1;
437  GO GID = *gidIter;
438  // Guess uniform distribution (TODO: maybe replace by a binary search)
439  // We go through all this trouble to avoid overflow and
440  // signed / unsigned casting mistakes (that were made in
441  // previous versions of this code).
442  int curRank;
443  const GO firstGuess = firstProcWithGIDs + GID / std::max(nOverP, one);
444  curRank = as<int>(std::min(firstGuess, as<GO>(numProcs - 1)));
445 
446  // This while loop will stop because
447  // allMinGIDs_[np] == global num elements
448  while (allMinGIDs_[curRank] == noGIDsOnProc) curRank++;
449 
450  bool badGID = false;
451  while (curRank >= firstProcWithGIDs && GID < allMinGIDs_[curRank]) {
452  curRank--;
453  while (curRank >= firstProcWithGIDs &&
454  allMinGIDs_[curRank] == noGIDsOnProc) curRank--;
455  }
456  if (curRank < firstProcWithGIDs) {
457  // GID is lower than all GIDs in map
458  badGID = true;
459  }
460  else if (curRank == numProcs) {
461  // GID is higher than all GIDs in map
462  badGID = true;
463  }
464  else {
465  // we have the lower bound; now limit from above
466  int above = curRank + 1;
467  while (allMinGIDs_[above] == noGIDsOnProc) above++;
468 
469  while (GID >= allMinGIDs_[above]) {
470  curRank = above;
471  if (curRank == numProcs) {
472  // GID is higher than all GIDs in map
473  badGID = true;
474  break;
475  }
476  above++;
477  while (allMinGIDs_[above] == noGIDsOnProc) above++;
478  }
479  }
480 
481  if (!badGID) {
482  image = curRank;
483  LID = as<LO> (GID - allMinGIDs_[image]);
484  }
485  else {
486  res = IDNotPresent;
487  }
488  *procIter++ = image;
489  if (computeLIDs) {
490  *lidIter++ = LID;
491  }
492  }
493  return res;
494  }
495 
496  template<class LO, class GO, class NT>
498  DistributedNoncontiguousDirectory (const map_type& map) :
499  oneToOneResult_ (ONE_TO_ONE_NOT_CALLED_YET), // to be revised below
500  locallyOneToOne_ (true), // to be revised below
501  useHashTables_ (false) // to be revised below
502  {
503  initialize (map, Teuchos::null);
504  }
505 
506  template<class LO, class GO, class NT>
507  DistributedNoncontiguousDirectory<LO, GO, NT>::
508  DistributedNoncontiguousDirectory (const map_type& map,
509  const tie_break_type& tie_break) :
510  oneToOneResult_ (ONE_TO_ONE_NOT_CALLED_YET), // to be revised below
511  locallyOneToOne_ (true), // to be revised below
512  useHashTables_ (false) // to be revised below
513  {
514  initialize (map, Teuchos::ptrFromRef (tie_break));
515  }
516 
517  template<class LO, class GO, class NT>
518  void
519  DistributedNoncontiguousDirectory<LO, GO, NT>::
520  initialize (const map_type& map,
521  Teuchos::Ptr<const tie_break_type> tie_break)
522  {
523  using Teuchos::arcp;
524  using Teuchos::Array;
525  using Teuchos::ArrayRCP;
526  using Teuchos::ArrayView;
527  using Teuchos::as;
528  using Teuchos::RCP;
529  using Teuchos::rcp;
530  using Teuchos::typeName;
531  using Teuchos::TypeNameTraits;
532  using std::cerr;
533  using std::endl;
534  typedef Array<int>::size_type size_type;
535 
536  // This class' implementation of getEntriesImpl() currently
537  // encodes the following assumptions:
538  //
539  // 1. global_size_t >= GO
540  // 2. global_size_t >= int
541  // 3. global_size_t >= LO
542  //
543  // We check these assumptions here.
544  TEUCHOS_TEST_FOR_EXCEPTION(sizeof(global_size_t) < sizeof(GO),
545  std::logic_error, typeName (*this) << ": sizeof(Tpetra::"
546  "global_size_t) = " << sizeof(global_size_t) << " < sizeof(Global"
547  "Ordinal = " << TypeNameTraits<LO>::name () << ") = " << sizeof(GO)
548  << ".");
549  TEUCHOS_TEST_FOR_EXCEPTION(sizeof(global_size_t) < sizeof(int),
550  std::logic_error, typeName (*this) << ": sizeof(Tpetra::"
551  "global_size_t) = " << sizeof(global_size_t) << " < sizeof(int) = "
552  << sizeof(int) << ".");
553  TEUCHOS_TEST_FOR_EXCEPTION(sizeof(global_size_t) < sizeof(LO),
554  std::logic_error, typeName (*this) << ": sizeof(Tpetra::"
555  "global_size_t) = " << sizeof(global_size_t) << " < sizeof(Local"
556  "Ordinal = " << TypeNameTraits<LO>::name () << ") = " << sizeof(LO)
557  << ".");
558 
559  RCP<const Teuchos::Comm<int> > comm = map.getComm ();
560  const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid ();
561  const GO minAllGID = map.getMinAllGlobalIndex ();
562  const GO maxAllGID = map.getMaxAllGlobalIndex ();
563 
564  // The "Directory Map" (see below) will have a range of elements
565  // from the minimum to the maximum GID of the user Map, and a
566  // minimum GID of minAllGID from the user Map. It doesn't
567  // actually have to store all those entries, though do beware of
568  // calling getLocalElementList on it (see Bug 5822).
569  const global_size_t numGlobalEntries = maxAllGID - minAllGID + 1;
570 
571  // We can't afford to replicate the whole directory on each
572  // process, so create the "Directory Map", a uniform contiguous
573  // Map that describes how we will distribute the directory over
574  // processes.
575  //
576  // FIXME (mfh 08 May 2012) Here we're setting minAllGID to be
577  // the index base. The index base should be separate from the
578  // minimum GID.
579  directoryMap_ = rcp (new map_type (numGlobalEntries, minAllGID, comm,
580  GloballyDistributed));
581  // The number of Directory elements that my process owns.
582  const size_t dir_numMyEntries = directoryMap_->getLocalNumElements ();
583 
584  // Fix for Bug 5822: If the input Map is "sparse," that is if
585  // the difference between the global min and global max GID is
586  // much larger than the global number of elements in the input
587  // Map, then it's possible that the Directory Map might have
588  // many more entries than the input Map on this process. This
589  // can cause memory scalability issues. In that case, we switch
590  // from the array-based implementation of Directory storage to
591  // the hash table - based implementation. We don't use hash
592  // tables all the time, because they are slower in the common
593  // case of a nonsparse Map.
594  //
595  // NOTE: This is a per-process decision. Some processes may use
596  // array-based storage, whereas others may use hash table -
597  // based storage.
598 
599  // A hash table takes a constant factor more space, more or
600  // less, than an array. Thus, it's not worthwhile, even in
601  // terms of memory usage, always to use a hash table.
602  // Furthermore, array lookups are faster than hash table
603  // lookups, so it may be worthwhile to use an array even if it
604  // takes more space. The "sparsity threshold" governs when to
605  // switch to a hash table - based implementation.
606  const size_t inverseSparsityThreshold = 10;
607  useHashTables_ =
608  (dir_numMyEntries >= inverseSparsityThreshold * map.getLocalNumElements());
609 
610  // Get list of process IDs that own the directory entries for the
611  // Map GIDs. These will be the targets of the sends that the
612  // Distributor will do.
613  const int myRank = comm->getRank ();
614  const size_t numMyEntries = map.getLocalNumElements ();
615  Array<int> sendImageIDs (numMyEntries);
616  ArrayView<const GO> myGlobalEntries = map.getLocalElementList ();
617  // An ID not present in this lookup indicates that it lies outside
618  // of the range [minAllGID,maxAllGID] (from map_). this means
619  // something is wrong with map_, our fault.
620  const LookupStatus lookupStatus =
621  directoryMap_->getRemoteIndexList (myGlobalEntries, sendImageIDs);
622  TEUCHOS_TEST_FOR_EXCEPTION(
623  lookupStatus == IDNotPresent, std::logic_error, Teuchos::typeName(*this)
624  << " constructor: the Directory Map could not find out where one or "
625  "more of my Map's indices should go. The input to getRemoteIndexList "
626  "is " << Teuchos::toString (myGlobalEntries) << ", and the output is "
627  << Teuchos::toString (sendImageIDs ()) << ". The input Map itself has "
628  "the following entries on the calling process " <<
629  map.getComm ()->getRank () << ": " <<
630  Teuchos::toString (map.getLocalElementList ()) << ", and has "
631  << map.getGlobalNumElements () << " total global indices in ["
632  << map.getMinAllGlobalIndex () << "," << map.getMaxAllGlobalIndex ()
633  << "]. The Directory Map has "
634  << directoryMap_->getGlobalNumElements () << " total global indices in "
635  "[" << directoryMap_->getMinAllGlobalIndex () << "," <<
636  directoryMap_->getMaxAllGlobalIndex () << "], and the calling process "
637  "has GIDs [" << directoryMap_->getMinGlobalIndex () << "," <<
638  directoryMap_->getMaxGlobalIndex () << "]. "
639  "This probably means there is a bug in Map or Directory. "
640  "Please report this bug to the Tpetra developers.");
641 
642  // Initialize the distributor using the list of process IDs to
643  // which to send. We'll use the distributor to send out triples
644  // of (GID, process ID, LID). We're sending the entries to the
645  // processes that the Directory Map says should own them, which is
646  // why we called directoryMap_->getRemoteIndexList() above.
647  Distributor distor (comm);
648  const size_t numReceives = distor.createFromSends (sendImageIDs);
649 
650  // NOTE (mfh 21 Mar 2012) The following code assumes that
651  // sizeof(GO) >= sizeof(int) and sizeof(GO) >= sizeof(LO).
652  //
653  // Create and fill buffer of (GID, PID, LID) triples to send
654  // out. We pack the (GID, PID, LID) triples into a single Array
655  // of GO, casting the PID from int to GO and the LID from LO to
656  // GO as we do so.
657  //
658  // FIXME (mfh 23 Mar 2014) This assumes that sizeof(LO) <=
659  // sizeof(GO) and sizeof(int) <= sizeof(GO). The former is
660  // required, and the latter is generally the case, but we should
661  // still check for this.
662  const int packetSize = 3; // We're sending triples, so packet size is 3.
663  Kokkos::View<GO*, Kokkos::HostSpace> exportEntries("exportEntries", packetSize * numMyEntries);
664  {
665  size_t exportIndex = 0;
666  for (size_t i = 0; i < numMyEntries; ++i) {
667  exportEntries[exportIndex++] = myGlobalEntries[i];
668  exportEntries[exportIndex++] = as<GO> (myRank);
669  exportEntries[exportIndex++] = as<GO> (i);
670  }
671  }
672  // Buffer of data to receive. The Distributor figured out for
673  // us how many packets we're receiving, when we called its
674  // createFromSends() method to set up the distribution plan.
675  Kokkos::View<GO*, Kokkos::HostSpace> importElements("importElements", packetSize * distor.getTotalReceiveLength());
676 
677  // Distribute the triples of (GID, process ID, LID).
678  distor.doPostsAndWaits(exportEntries, packetSize, importElements);
679 
680  // Unpack the redistributed data. Both implementations of
681  // Directory storage map from an LID in the Directory Map (which
682  // is the LID of the GID to store) to either a PID or an LID in
683  // the input Map. Each "packet" (contiguous chunk of
684  // importElements) contains a triple: (GID, PID, LID).
685  if (useHashTables_) {
686  // Create the hash tables. We know exactly how many elements
687  // to expect in each hash table. FixedHashTable's constructor
688  // currently requires all the keys and values at once, so we
689  // have to extract them in temporary arrays. It may be
690  // possible to rewrite FixedHashTable to use a "start fill" /
691  // "end fill" approach that avoids the temporary arrays, but
692  // we won't try that for now.
693 
694  // The constructors of Array and ArrayRCP that take a number
695  // of elements all initialize the arrays. Instead, allocate
696  // raw arrays, then hand them off to ArrayRCP, to avoid the
697  // initial unnecessary initialization without losing the
698  // benefit of exception safety (and bounds checking, in a
699  // debug build).
700  LO* tableKeysRaw = NULL;
701  LO* tableLidsRaw = NULL;
702  int* tablePidsRaw = NULL;
703  try {
704  tableKeysRaw = new LO [numReceives];
705  tableLidsRaw = new LO [numReceives];
706  tablePidsRaw = new int [numReceives];
707  } catch (...) {
708  if (tableKeysRaw != NULL) {
709  delete [] tableKeysRaw;
710  }
711  if (tableLidsRaw != NULL) {
712  delete [] tableLidsRaw;
713  }
714  if (tablePidsRaw != NULL) {
715  delete [] tablePidsRaw;
716  }
717  throw;
718  }
719  ArrayRCP<LO> tableKeys (tableKeysRaw, 0, numReceives, true);
720  ArrayRCP<LO> tableLids (tableLidsRaw, 0, numReceives, true);
721  ArrayRCP<int> tablePids (tablePidsRaw, 0, numReceives, true);
722 
723  if (tie_break.is_null ()) {
724  // Fill the temporary arrays of keys and values.
725  size_type importIndex = 0;
726  for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
727  const GO curGID = importElements[importIndex++];
728  const LO curLID = directoryMap_->getLocalElement (curGID);
729  TEUCHOS_TEST_FOR_EXCEPTION(
730  curLID == LINVALID, std::logic_error,
731  Teuchos::typeName(*this) << " constructor: Incoming global index "
732  << curGID << " does not have a corresponding local index in the "
733  "Directory Map. Please report this bug to the Tpetra developers.");
734  tableKeys[i] = curLID;
735  tablePids[i] = importElements[importIndex++];
736  tableLids[i] = importElements[importIndex++];
737  }
738  // Set up the hash tables. The hash tables' constructor
739  // detects whether there are duplicates, so that we can set
740  // locallyOneToOne_.
741  lidToPidTable_ =
742  rcp (new lidToPidTable_type (tableKeys (), tablePids ()));
743  locallyOneToOne_ = ! (lidToPidTable_->hasDuplicateKeys ());
744  lidToLidTable_ =
745  rcp (new lidToLidTable_type (tableKeys (), tableLids ()));
746  }
747  else { // tie_break is NOT null
748 
749  // For each directory Map LID received, collect all the
750  // corresponding (PID,LID) pairs. If the input Map is not
751  // one-to-one, corresponding directory Map LIDs will have
752  // more than one pair. In that case, we will use the
753  // TieBreak object to pick exactly one pair.
754  typedef std::map<LO, std::vector<std::pair<int, LO> > > pair_table_type;
755  pair_table_type ownedPidLidPairs;
756 
757  // For each directory Map LID received, collect the zero or
758  // more input Map (PID,LID) pairs into ownedPidLidPairs.
759  size_type importIndex = 0;
760  for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
761  const GO curGID = importElements[importIndex++];
762  const LO dirMapLid = directoryMap_->getLocalElement (curGID);
763  TEUCHOS_TEST_FOR_EXCEPTION(
764  dirMapLid == LINVALID, std::logic_error,
765  Teuchos::typeName(*this) << " constructor: Incoming global index "
766  << curGID << " does not have a corresponding local index in the "
767  "Directory Map. Please report this bug to the Tpetra developers.");
768  tableKeys[i] = dirMapLid;
769  const int PID = importElements[importIndex++];
770  const int LID = importElements[importIndex++];
771 
772  // These may change below. We fill them in just to ensure
773  // that they won't have invalid values.
774  tablePids[i] = PID;
775  tableLids[i] = LID;
776 
777  // For every directory Map LID, we have to remember all
778  // (PID, LID) pairs. The TieBreak object will arbitrate
779  // between them in the loop below.
780  ownedPidLidPairs[dirMapLid].push_back (std::make_pair (PID, LID));
781  }
782 
783  // Use TieBreak to arbitrate between (PID,LID) pairs
784  // corresponding to each directory Map LID.
785  //
786  // FIXME (mfh 23 Mar 2014) How do I know that i is the same
787  // as the directory Map LID?
788  // KDD 21 Mar 2018: It isn't, especially if the user's IDs are not
789  // contiguous, but the directory map is. Need to set tablePids[i]
790  // and tableLids[i], so need to loop over numReceives (as that is
791  // how those arrays are allocated). FIXED
792 
793  for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
794  const LO dirMapLid = tableKeys[i];
795  const std::vector<std::pair<int, LO> >& pidLidList =
796  ownedPidLidPairs[dirMapLid];
797  const size_t listLen = pidLidList.size();
798  if (listLen == 0) continue; // KDD This will never happen
799  const GO dirMapGid = directoryMap_->getGlobalElement (dirMapLid);
800  if (listLen > 1) {
801  locallyOneToOne_ = false;
802  }
803  // If there is some (PID,LID) pair for the current input
804  // Map LID, then it makes sense to invoke the TieBreak
805  // object to arbitrate between the options. Even if
806  // there is only one (PID,LID) pair, we still want to
807  // give the TieBreak object a chance to do whatever it
808  // likes to do, in terms of side effects (e.g., track
809  // (PID,LID) pairs).
810  const size_type index =
811  static_cast<size_type> (tie_break->selectedIndex (dirMapGid,
812  pidLidList));
813  tablePids[i] = pidLidList[index].first;
814  tableLids[i] = pidLidList[index].second;
815  }
816 
817  // Set up the hash tables.
818  lidToPidTable_ =
819  rcp (new lidToPidTable_type (tableKeys (), tablePids ()));
820  lidToLidTable_ =
821  rcp (new lidToLidTable_type (tableKeys (), tableLids ()));
822  }
823  }
824  else {
825  if (tie_break.is_null ()) {
826  // Use array-based implementation of Directory storage.
827  // Allocate these arrays and fill them with invalid values,
828  // in case the input Map's GID list is sparse (i.e., does
829  // not populate all GIDs from minAllGID to maxAllGID).
830  PIDs_ = arcp<int> (dir_numMyEntries);
831  std::fill (PIDs_.begin (), PIDs_.end (), -1);
832  LIDs_ = arcp<LO> (dir_numMyEntries);
833  std::fill (LIDs_.begin (), LIDs_.end (), LINVALID);
834  // Fill in the arrays with PIDs resp. LIDs.
835  size_type importIndex = 0;
836  for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
837  const GO curGID = importElements[importIndex++];
838  const LO curLID = directoryMap_->getLocalElement (curGID);
839  TEUCHOS_TEST_FOR_EXCEPTION(curLID == LINVALID, std::logic_error,
840  Teuchos::typeName(*this) << " constructor: Incoming global index "
841  << curGID << " does not have a corresponding local index in the "
842  "Directory Map. Please report this bug to the Tpetra developers.");
843 
844  // If PIDs_[curLID] is not -1, then curGID is a duplicate
845  // on the calling process, so the Directory is not locally
846  // one-to-one.
847  if (PIDs_[curLID] != -1) {
848  locallyOneToOne_ = false;
849  }
850  PIDs_[curLID] = importElements[importIndex++];
851  LIDs_[curLID] = importElements[importIndex++];
852  }
853  }
854  else {
855  PIDs_ = arcp<int> (dir_numMyEntries);
856  LIDs_ = arcp<LO> (dir_numMyEntries);
857  std::fill (PIDs_.begin (), PIDs_.end (), -1);
858 
859  // All received (PID, LID) pairs go into ownedPidLidPairs.
860  // This is a map from the directory Map's LID to the (PID,
861  // LID) pair (where the latter LID comes from the input Map,
862  // not the directory Map). If the input Map is not
863  // one-to-one, corresponding LIDs will have
864  // ownedPidLidPairs[curLID].size() > 1. In that case, we
865  // will use the TieBreak object to pick exactly one pair.
866  Array<std::vector<std::pair<int, LO> > > ownedPidLidPairs (dir_numMyEntries);
867  size_t importIndex = 0;
868  for (size_t i = 0; i < numReceives; ++i) {
869  const GO GID = importElements[importIndex++];
870  const int PID = importElements[importIndex++];
871  const LO LID = importElements[importIndex++];
872 
873  const LO dirMapLid = directoryMap_->getLocalElement (GID);
874  TEUCHOS_TEST_FOR_EXCEPTION(
875  dirMapLid == LINVALID, std::logic_error,
876  Teuchos::typeName(*this) << " constructor: Incoming global index "
877  << GID << " does not have a corresponding local index in the "
878  "Directory Map. Please report this bug to the Tpetra developers.");
879  ownedPidLidPairs[dirMapLid].push_back (std::make_pair (PID, LID));
880  }
881 
882  // Use TieBreak to arbitrate between (PID,LID) pairs
883  // corresponding to each directory Map LID.
884  //
885  // FIXME (mfh 23 Mar 2014) How do I know that i is the same
886  // as the directory Map LID?
887  // KDD 21 Mar 2018: It isn't, especially if the user's IDs are not
888  // contiguous. Loop over all ownedPidLidPairs; skip those that have
889  // empty lists. FIXED
890 
891  for (size_t i = 0; i < dir_numMyEntries; ++i) {
892  const std::vector<std::pair<int, LO> >& pidLidList =
893  ownedPidLidPairs[i];
894  const size_t listLen = pidLidList.size();
895  if (listLen == 0) continue; // KDD will happen for GIDs not in
896  // KDD the user's source map
897  const LO dirMapLid = static_cast<LO> (i);
898  const GO dirMapGid = directoryMap_->getGlobalElement (dirMapLid);
899  if (listLen > 1) {
900  locallyOneToOne_ = false;
901  }
902  // If there is some (PID,LID) pair for the current input
903  // Map LID, then it makes sense to invoke the TieBreak
904  // object to arbitrate between the options. Even if
905  // there is only one (PID,LID) pair, we still want to
906  // give the TieBreak object a chance to do whatever it
907  // likes to do, in terms of side effects (e.g., track
908  // (PID,LID) pairs).
909  const size_type index =
910  static_cast<size_type> (tie_break->selectedIndex (dirMapGid,
911  pidLidList));
912  PIDs_[i] = pidLidList[index].first;
913  LIDs_[i] = pidLidList[index].second;
914  }
915  }
916  }
917  }
918 
919  template<class LO, class GO, class NT>
920  std::string
922  {
923  std::ostringstream os;
924  os << "DistributedNoncontiguousDirectory"
925  << "<" << Teuchos::TypeNameTraits<LO>::name ()
926  << ", " << Teuchos::TypeNameTraits<GO>::name ()
927  << ", " << Teuchos::TypeNameTraits<NT>::name () << ">";
928  return os.str ();
929  }
930 
931  template<class LO, class GO, class NT>
934  getEntriesImpl (const map_type& map,
935  const Teuchos::ArrayView<const GO> &globalIDs,
936  const Teuchos::ArrayView<int> &nodeIDs,
937  const Teuchos::ArrayView<LO> &localIDs,
938  const bool computeLIDs) const
939  {
940  using Teuchos::Array;
941  using Teuchos::ArrayRCP;
942  using Teuchos::ArrayView;
943  using Teuchos::as;
944  using Teuchos::RCP;
945  using Teuchos::toString;
946  using Details::Behavior;
948  using std::cerr;
949  using std::endl;
950  using size_type = typename Array<GO>::size_type;
951  const char funcPrefix[] = "Tpetra::"
952  "DistributedNoncontiguousDirectory::getEntriesImpl: ";
953  const char errSuffix[] =
954  " Please report this bug to the Tpetra developers.";
955 
956  RCP<const Teuchos::Comm<int> > comm = map.getComm ();
957  const bool verbose = Behavior::verbose ("Directory") ||
958  Behavior::verbose ("Tpetra::Directory");
959  const size_t maxNumToPrint = verbose ?
961 
962  std::unique_ptr<std::string> procPrefix;
963  if (verbose) {
964  std::ostringstream os;
965  os << "Proc ";
966  if (map.getComm ().is_null ()) {
967  os << "?";
968  }
969  else {
970  os << map.getComm ()->getRank ();
971  }
972  os << ": ";
973  procPrefix = std::unique_ptr<std::string>(
974  new std::string(os.str()));
975  os << funcPrefix << "{";
976  verbosePrintArray(os, globalIDs, "GIDs", maxNumToPrint);
977  os << ", ";
978  verbosePrintArray(os, nodeIDs, "PIDs", maxNumToPrint);
979  os << ", ";
980  verbosePrintArray(os, localIDs, "LIDs", maxNumToPrint);
981  os << ", computeLIDs: "
982  << (computeLIDs ? "true" : "false") << "}" << endl;
983  cerr << os.str ();
984  }
985 
986  const size_t numEntries = globalIDs.size ();
987  const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid();
989 
990  //
991  // Set up directory structure.
992  //
993 
994  // If we're computing LIDs, we also have to include them in each
995  // packet, along with the GID and process ID.
996  const int packetSize = computeLIDs ? 3 : 2;
997 
998  // For data distribution, we use: Surprise! A Distributor!
999  Distributor distor (comm);
1000 
1001  // Get directory locations for the requested list of entries.
1002  Array<int> dirImages (numEntries);
1003  if (verbose) {
1004  std::ostringstream os;
1005  os << *procPrefix << "Call directoryMap_->getRemoteIndexList"
1006  << endl;
1007  cerr << os.str ();
1008  }
1009  res = directoryMap_->getRemoteIndexList (globalIDs, dirImages ());
1010  if (verbose) {
1011  std::ostringstream os;
1012  os << *procPrefix << "Director Map getRemoteIndexList out ";
1013  verbosePrintArray(os, dirImages, "PIDs", maxNumToPrint);
1014  os << endl;
1015  cerr << os.str ();
1016  }
1017 
1018  // Check for unfound globalIDs and set corresponding nodeIDs to -1
1019  size_t numMissing = 0;
1020  if (res == IDNotPresent) {
1021  for (size_t i=0; i < numEntries; ++i) {
1022  if (dirImages[i] == -1) {
1023  nodeIDs[i] = -1;
1024  if (computeLIDs) {
1025  localIDs[i] = LINVALID;
1026  }
1027  numMissing++;
1028  }
1029  }
1030  }
1031 
1032  Array<GO> sendGIDs;
1033  Array<int> sendImages;
1034  if (verbose) {
1035  std::ostringstream os;
1036  os << *procPrefix << "Call Distributor::createFromRecvs"
1037  << endl;
1038  cerr << os.str ();
1039  }
1040  distor.createFromRecvs (globalIDs, dirImages (), sendGIDs, sendImages);
1041  if (verbose) {
1042  std::ostringstream os;
1043  os << *procPrefix << "Distributor::createFromRecvs result: "
1044  << "{";
1045  verbosePrintArray(os, sendGIDs, "sendGIDs", maxNumToPrint);
1046  os << ", ";
1047  verbosePrintArray(os, sendImages, "sendPIDs", maxNumToPrint);
1048  os << "}" << endl;
1049  cerr << os.str ();
1050  }
1051  const size_type numSends = sendGIDs.size ();
1052 
1053  //
1054  // mfh 13 Nov 2012:
1055  //
1056  // The code below temporarily stores LO, GO, and int values in
1057  // an array of global_size_t. If one of the signed types (LO
1058  // and GO should both be signed) happened to be -1 (or some
1059  // negative number, but -1 is the one that came up today), then
1060  // conversion to global_size_t will result in a huge
1061  // global_size_t value, and thus conversion back may overflow.
1062  // (Teuchos::as doesn't know that we meant it to be an LO or GO
1063  // all along.)
1064  //
1065  // The overflow normally would not be a problem, since it would
1066  // just go back to -1 again. However, Teuchos::as does range
1067  // checking on conversions in a debug build, so it throws an
1068  // exception (std::range_error) in this case. Range checking is
1069  // generally useful in debug mode, so we don't want to disable
1070  // this behavior globally.
1071  //
1072  // We solve this problem by forgoing use of Teuchos::as for the
1073  // conversions below from LO, GO, or int to global_size_t, and
1074  // the later conversions back from global_size_t to LO, GO, or
1075  // int.
1076  //
1077  // I've recorded this discussion as Bug 5760.
1078  //
1079 
1080  // global_size_t >= GO
1081  // global_size_t >= size_t >= int
1082  // global_size_t >= size_t >= LO
1083  // Therefore, we can safely store all of these in a global_size_t
1084  Kokkos::View<global_size_t*, Kokkos::HostSpace> exports("exports", packetSize * numSends);
1085  {
1086  // Packet format:
1087  // - If computing LIDs: (GID, PID, LID)
1088  // - Otherwise: (GID, PID)
1089  //
1090  // "PID" means "process ID" (a.k.a. "node ID," a.k.a. "rank").
1091 
1092  // Current position to which to write in exports array. If
1093  // sending pairs, we pack the (GID, PID) pair for gid =
1094  // sendGIDs[k] in exports[2*k], exports[2*k+1]. If sending
1095  // triples, we pack the (GID, PID, LID) pair for gid =
1096  // sendGIDs[k] in exports[3*k, 3*k+1, 3*k+2].
1097  size_t exportsIndex = 0;
1098 
1099  if (useHashTables_) {
1100  if (verbose) {
1101  std::ostringstream os;
1102  os << *procPrefix << "Pack exports (useHashTables_ true)"
1103  << endl;
1104  cerr << os.str ();
1105  }
1106  for (size_type gidIndex = 0; gidIndex < numSends; ++gidIndex) {
1107  const GO curGID = sendGIDs[gidIndex];
1108  // Don't use as() here (see above note).
1109  exports[exportsIndex++] = static_cast<global_size_t> (curGID);
1110  const LO curLID = directoryMap_->getLocalElement (curGID);
1111  TEUCHOS_TEST_FOR_EXCEPTION
1112  (curLID == LINVALID, std::logic_error, funcPrefix <<
1113  "Directory Map's global index " << curGID << " lacks "
1114  "a corresponding local index." << errSuffix);
1115  // Don't use as() here (see above note).
1116  exports[exportsIndex++] =
1117  static_cast<global_size_t> (lidToPidTable_->get (curLID));
1118  if (computeLIDs) {
1119  // Don't use as() here (see above note).
1120  exports[exportsIndex++] =
1121  static_cast<global_size_t> (lidToLidTable_->get (curLID));
1122  }
1123  }
1124  }
1125  else {
1126  if (verbose) {
1127  std::ostringstream os;
1128  os << *procPrefix << "Pack exports (useHashTables_ false)"
1129  << endl;
1130  cerr << os.str ();
1131  }
1132  for (size_type gidIndex = 0; gidIndex < numSends; ++gidIndex) {
1133  const GO curGID = sendGIDs[gidIndex];
1134  // Don't use as() here (see above note).
1135  exports[exportsIndex++] = static_cast<global_size_t> (curGID);
1136  const LO curLID = directoryMap_->getLocalElement (curGID);
1137  TEUCHOS_TEST_FOR_EXCEPTION
1138  (curLID == LINVALID, std::logic_error, funcPrefix <<
1139  "Directory Map's global index " << curGID << " lacks "
1140  "a corresponding local index." << errSuffix);
1141  // Don't use as() here (see above note).
1142  exports[exportsIndex++] =
1143  static_cast<global_size_t> (PIDs_[curLID]);
1144  if (computeLIDs) {
1145  // Don't use as() here (see above note).
1146  exports[exportsIndex++] =
1147  static_cast<global_size_t> (LIDs_[curLID]);
1148  }
1149  }
1150  }
1151 
1152  TEUCHOS_TEST_FOR_EXCEPTION
1153  (exportsIndex > exports.size(), std::logic_error,
1154  funcPrefix << "On Process " << comm->getRank () << ", "
1155  "exportsIndex = " << exportsIndex << " > exports.size() = "
1156  << exports.size () << "." << errSuffix);
1157  }
1158 
1159  TEUCHOS_TEST_FOR_EXCEPTION
1160  (numEntries < numMissing, std::logic_error, funcPrefix <<
1161  "On Process " << comm->getRank () << ", numEntries = " <<
1162  numEntries << " < numMissing = " << numMissing << "." <<
1163  errSuffix);
1164 
1165  //
1166  // mfh 13 Nov 2012: See note above on conversions between
1167  // global_size_t and LO, GO, or int.
1168  //
1169  const size_t numRecv = numEntries - numMissing;
1170 
1171  {
1172  const size_t importLen = packetSize * distor.getTotalReceiveLength ();
1173  const size_t requiredImportLen = numRecv * packetSize;
1174  const int myRank = comm->getRank ();
1175  TEUCHOS_TEST_FOR_EXCEPTION(
1176  importLen < requiredImportLen, std::logic_error,
1177  "Tpetra::Details::DistributedNoncontiguousDirectory::getEntriesImpl: "
1178  "On Process " << myRank << ": The 'imports' array must have length "
1179  "at least " << requiredImportLen << ", but its actual length is " <<
1180  importLen << ". numRecv: " << numRecv << ", packetSize: " <<
1181  packetSize << ", numEntries (# GIDs): " << numEntries <<
1182  ", numMissing: " << numMissing << ": distor.getTotalReceiveLength(): "
1183  << distor.getTotalReceiveLength () << ". " << std::endl <<
1184  "Distributor description: " << distor.description () << ". "
1185  << std::endl <<
1186  "Please report this bug to the Tpetra developers.");
1187  }
1188 
1189  Kokkos::View<global_size_t*, Kokkos::HostSpace> imports("imports", packetSize * distor.getTotalReceiveLength());
1190  // FIXME (mfh 20 Mar 2014) One could overlap the sort2() below
1191  // with communication, by splitting this call into doPosts and
1192  // doWaits. The code is still correct in this form, however.
1193  if (verbose) {
1194  std::ostringstream os;
1195  os << *procPrefix << "Call doPostsAndWaits: {"
1196  << "packetSize: " << packetSize << ", ";
1197  verbosePrintArray(os, exports, "exports", maxNumToPrint);
1198  os << "}" << endl;
1199  cerr << os.str ();
1200  }
1201  distor.doPostsAndWaits(exports, packetSize, imports);
1202  if (verbose) {
1203  std::ostringstream os;
1204  os << *procPrefix << "doPostsAndWaits result: ";
1205  verbosePrintArray(os, imports, "imports", maxNumToPrint);
1206  os << endl;
1207  cerr << os.str ();
1208  }
1209 
1210  Array<GO> sortedIDs (globalIDs); // deep copy (for later sorting)
1211  Array<GO> offset (numEntries); // permutation array (sort2 output)
1212  for (GO ii = 0; ii < static_cast<GO> (numEntries); ++ii) {
1213  offset[ii] = ii;
1214  }
1215  sort2 (sortedIDs.begin(), sortedIDs.begin() + numEntries, offset.begin());
1216  if (verbose) {
1217  std::ostringstream os;
1218  os << *procPrefix;
1219  verbosePrintArray(os, sortedIDs, "sortedIDs", maxNumToPrint);
1220  os << ", ";
1221  verbosePrintArray(os, offset, "offset", maxNumToPrint);
1222  os << endl;
1223  cerr << os.str ();
1224  }
1225 
1226  size_t importsIndex = 0;
1227 
1228  // we know these conversions are in range, because we loaded this data
1229  //
1230  // Don't use as() for conversions here; we know they are in range.
1231  for (size_t i = 0; i < numRecv; ++i) {
1232  const GO curGID = static_cast<GO> (imports[importsIndex++]);
1233  auto p1 = std::equal_range (sortedIDs.begin(),
1234  sortedIDs.end(), curGID);
1235  if (p1.first != p1.second) {
1236  const size_t j = p1.first - sortedIDs.begin();
1237  nodeIDs[offset[j]] =
1238  static_cast<int> (imports[importsIndex++]);
1239  if (computeLIDs) {
1240  localIDs[offset[j]] =
1241  static_cast<LO> (imports[importsIndex++]);
1242  }
1243  if (nodeIDs[offset[j]] == -1) {
1244  res = IDNotPresent;
1245  }
1246  }
1247  }
1248 
1249  TEUCHOS_TEST_FOR_EXCEPTION
1250  (size_t (importsIndex) > size_t (imports.size ()),
1251  std::logic_error, funcPrefix << "On Process " <<
1252  comm->getRank () << ": importsIndex = " << importsIndex
1253  << " > imports.size() = " << imports.size () << ". "
1254  "numRecv: " << numRecv
1255  << ", packetSize: " << packetSize << ", "
1256  "numEntries (# GIDs): " << numEntries
1257  << ", numMissing: " << numMissing
1258  << ": distor.getTotalReceiveLength(): "
1259  << distor.getTotalReceiveLength () << "." << errSuffix);
1260  if (verbose) {
1261  std::ostringstream os;
1262  os << *procPrefix << funcPrefix << "Done!" << endl;
1263  cerr << os.str ();
1264  }
1265  return res;
1266  }
1267 
1268 
1269  template<class LO, class GO, class NT>
1270  bool
1272  isOneToOne (const Teuchos::Comm<int>& comm) const
1273  {
1274  if (oneToOneResult_ == ONE_TO_ONE_NOT_CALLED_YET) {
1275  const int lcl121 = isLocallyOneToOne () ? 1 : 0;
1276  int gbl121 = 0;
1277  Teuchos::reduceAll<int, int> (comm, Teuchos::REDUCE_MIN, lcl121,
1278  Teuchos::outArg (gbl121));
1279  oneToOneResult_ = (gbl121 == 1) ? ONE_TO_ONE_TRUE : ONE_TO_ONE_FALSE;
1280  }
1281  return (oneToOneResult_ == ONE_TO_ONE_TRUE);
1282  }
1283  } // namespace Details
1284 } // namespace Tpetra
1285 
1286 //
1287 // Explicit instantiation macro
1288 //
1289 // Must be expanded from within the Tpetra namespace!
1290 //
1291 #define TPETRA_DIRECTORYIMPL_INSTANT(LO,GO,NODE) \
1292  namespace Details { \
1293  template class Directory< LO , GO , NODE >; \
1294  template class ReplicatedDirectory< LO , GO , NODE >; \
1295  template class ContiguousUniformDirectory< LO, GO, NODE >; \
1296  template class DistributedContiguousDirectory< LO , GO , NODE >; \
1297  template class DistributedNoncontiguousDirectory< LO , GO , NODE >; \
1298  }
1299 
1300 #endif // TPETRA_DIRECTORYIMPL_DEF_HPP
void initialize(int *argc, char ***argv)
Initialize Tpetra.
std::string description() const override
A one-line human-readable description of this object.
Interface for breaking ties in ownership.
std::string description() const
Return a one-line description of this object.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
std::string description() const override
A one-line human-readable description of this object.
Implementation of Directory for a distributed noncontiguous Map.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
bool isOneToOne(const Teuchos::Comm< int > &comm) const override
Whether the Directory&#39;s input Map is (globally) one to one.
Implementation of Directory for a distributed contiguous Map.
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const override
Find process IDs and (optionally) local IDs for the given global IDs.
bool isOneToOne(const Teuchos::Comm< int > &comm) const override
Whether the Directory&#39;s input Map is (globally) one to one.
size_t global_size_t
Global size_t object.
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const override
Find process IDs and (optionally) local IDs for the given global IDs.
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2, const bool stableSort=false)
Sort the first array, and apply the resulting permutation to the second array.
LookupStatus getEntries(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const
bool isNodeGlobalElement(global_ordinal_type globalIndex) const
Whether the given global index is owned by this Map on the calling process.
Sets up and executes a communication plan for a Tpetra DistObject.
static bool verbose()
Whether Tpetra is in verbose mode.
size_t getTotalReceiveLength() const
Total number of values this process will receive from other processes.
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const override
Find process IDs and (optionally) local IDs for the given global IDs.
std::string description() const override
A one-line human-readable description of this object.
A parallel distribution of indices over processes.
ReplicatedDirectory()=default
Constructor (that takes no arguments).
std::enable_if<(Kokkos::is_view< ExpView >::value &&Kokkos::is_view< ImpView >::value)>::type doPostsAndWaits(const ExpView &exports, size_t numPackets, const ImpView &imports)
Execute the (forward) communication plan.
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
Stand-alone utility functions and macros.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const override
Find process IDs and (optionally) local IDs for the given global IDs.
Implementation of Directory for a contiguous, uniformly distributed Map.
void createFromRecvs(const Teuchos::ArrayView< const Ordinal > &remoteIDs, const Teuchos::ArrayView< const int > &remoteProcIDs, Teuchos::Array< Ordinal > &exportIDs, Teuchos::Array< int > &exportProcIDs)
Set up Distributor using list of process ranks from which to receive.
std::string description() const override
A one-line human-readable description of this object.
global_ordinal_type getMinAllGlobalIndex() const
The minimum global index over all processes in the communicator.
global_size_t getGlobalNumElements() const
The number of elements in this Map.
Description of Tpetra&#39;s behavior.