Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Tpetra_DirectoryImpl_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef __Tpetra_DirectoryImpl_def_hpp
43 #define __Tpetra_DirectoryImpl_def_hpp
44 
47 
49 #include <Tpetra_Distributor.hpp>
50 #include <Tpetra_Map.hpp>
51 #include <Tpetra_TieBreak.hpp>
52 
53 #include <Tpetra_Details_FixedHashTable.hpp>
54 #include <Tpetra_HashTable.hpp>
55 #include "Teuchos_Comm.hpp"
56 
57 // FIXME (mfh 16 Apr 2013) GIANT HACK BELOW
58 #ifdef HAVE_TPETRACORE_MPI
59 # include <mpi.h>
60 #endif // HAVE_TPETRACORE_MPI
61 // FIXME (mfh 16 Apr 2013) GIANT HACK ABOVE
62 
63 namespace Tpetra {
64  namespace Details {
65  template<class LO, class GO, class NT>
68 
69  template<class LO, class GO, class NT>
72  getEntries (const map_type& map,
73  const Teuchos::ArrayView<const GO> &globalIDs,
74  const Teuchos::ArrayView<int> &nodeIDs,
75  const Teuchos::ArrayView<LO> &localIDs,
76  const bool computeLIDs) const
77  {
78  // Ensure that globalIDs, nodeIDs, and localIDs (if applicable)
79  // all have the same size, before modifying any output arguments.
80  TEUCHOS_TEST_FOR_EXCEPTION(nodeIDs.size() != globalIDs.size(),
81  std::invalid_argument, Teuchos::typeName(*this) << "::getEntries(): "
82  "Output arrays do not have the right sizes. nodeIDs.size() = "
83  << nodeIDs.size() << " != globalIDs.size() = " << globalIDs.size()
84  << ".");
85  TEUCHOS_TEST_FOR_EXCEPTION(
86  computeLIDs && localIDs.size() != globalIDs.size(),
87  std::invalid_argument, Teuchos::typeName(*this) << "::getEntries(): "
88  "Output array do not have the right sizes. localIDs.size() = "
89  << localIDs.size() << " != globalIDs.size() = " << globalIDs.size()
90  << ".");
91 
92  // Initially, fill nodeIDs and localIDs (if applicable) with
93  // invalid values. The "invalid" process ID is -1 (this means
94  // the same thing as MPI_ANY_SOURCE to Teuchos, so it's an
95  // "invalid" process ID); the invalid local ID comes from
96  // OrdinalTraits.
97  std::fill (nodeIDs.begin(), nodeIDs.end(), -1);
98  if (computeLIDs) {
99  std::fill (localIDs.begin(), localIDs.end(),
100  Teuchos::OrdinalTraits<LO>::invalid ());
101  }
102  // Actually do the work.
103  return this->getEntriesImpl (map, globalIDs, nodeIDs, localIDs, computeLIDs);
104  }
105 
106 
107  template<class LO, class GO, class NT>
110  numProcs_ (map.getComm ()->getSize ())
111  {}
112 
113 
114  template<class LO, class GO, class NT>
117  numProcs_ (0) // to be set later
118  {}
119 
120 
121  template<class LO, class GO, class NT>
122  bool
124  isOneToOne (const Teuchos::Comm<int>& /* comm */) const
125  {
126  // A locally replicated Map is one-to-one only if there is no
127  // replication, that is, only if the Map's communicator only has
128  // one process.
129  return (numProcs_ == 1);
130  }
131 
132 
133  template<class LO, class GO, class NT>
134  std::string
136  {
137  std::ostringstream os;
138  os << "ReplicatedDirectory"
139  << "<" << Teuchos::TypeNameTraits<LO>::name ()
140  << ", " << Teuchos::TypeNameTraits<GO>::name ()
141  << ", " << Teuchos::TypeNameTraits<NT>::name () << ">";
142  return os.str ();
143  }
144 
145 
146  template<class LO, class GO, class NT>
149  {
150  TEUCHOS_TEST_FOR_EXCEPTION(! map.isContiguous (), std::invalid_argument,
151  Teuchos::typeName (*this) << " constructor: Map is not contiguous.");
152  TEUCHOS_TEST_FOR_EXCEPTION(! map.isUniform (), std::invalid_argument,
153  Teuchos::typeName (*this) << " constructor: Map is not uniform.");
154  }
155 
156 
157  template<class LO, class GO, class NT>
158  std::string
160  {
161  std::ostringstream os;
162  os << "ContiguousUniformDirectory"
163  << "<" << Teuchos::TypeNameTraits<LO>::name ()
164  << ", " << Teuchos::TypeNameTraits<GO>::name ()
165  << ", " << Teuchos::TypeNameTraits<NT>::name () << ">";
166  return os.str ();
167  }
168 
169 
170  template<class LO, class GO, class NT>
174  const Teuchos::ArrayView<const GO> &globalIDs,
175  const Teuchos::ArrayView<int> &nodeIDs,
176  const Teuchos::ArrayView<LO> &localIDs,
177  const bool computeLIDs) const
178  {
179  using Teuchos::Comm;
180  using Teuchos::RCP;
181  typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
182  const LO invalidLid = Teuchos::OrdinalTraits<LO>::invalid ();
184 
185  RCP<const Comm<int> > comm = map.getComm ();
186  const GO g_min = map.getMinAllGlobalIndex ();
187 
188  // Let N_G be the global number of elements in the Map,
189  // and P be the number of processes in its communicator.
190  // Then, N_G = P * N_L + R = R*(N_L + 1) + (P - R)*N_L.
191  //
192  // The first R processes own N_L+1 elements.
193  // The remaining P-R processes own N_L elements.
194  //
195  // Let g be the current GID, g_min be the global minimum GID,
196  // and g_0 = g - g_min. If g is a valid GID in this Map, then
197  // g_0 is in [0, N_G - 1].
198  //
199  // If g is a valid GID in this Map and g_0 < R*(N_L + 1), then
200  // the rank of the process that owns g is floor(g_0 / (N_L +
201  // 1)), and its corresponding local index on that process is g_0
202  // mod (N_L + 1).
203  //
204  // Let g_R = g_0 - R*(N_L + 1). If g is a valid GID in this Map
205  // and g_0 >= R*(N_L + 1), then the rank of the process that
206  // owns g is then R + floor(g_R / N_L), and its corresponding
207  // local index on that process is g_R mod N_L.
208 
209  const size_type N_G =
210  static_cast<size_type> (map.getGlobalNumElements ());
211  const size_type P = static_cast<size_type> (comm->getSize ());
212  const size_type N_L = N_G / P;
213  const size_type R = N_G - N_L * P; // N_G mod P
214  const size_type N_R = R * (N_L + static_cast<size_type> (1));
215 
216 #ifdef HAVE_TPETRA_DEBUG
217  TEUCHOS_TEST_FOR_EXCEPTION(
218  N_G != P*N_L + R, std::logic_error,
219  "Tpetra::ContiguousUniformDirectory::getEntriesImpl: "
220  "N_G = " << N_G << " != P*N_L + R = " << P << "*" << N_L << " + " << R
221  << " = " << P*N_L + R << ". "
222  "Please report this bug to the Tpetra developers.");
223 #endif // HAVE_TPETRA_DEBUG
224 
225  const size_type numGids = globalIDs.size (); // for const loop bound
226  // Avoid signed/unsigned comparisons below, in case GO is
227  // unsigned. (Integer literals are generally signed.)
228  const GO ONE = static_cast<GO> (1);
229 
230  if (computeLIDs) {
231  for (size_type k = 0; k < numGids; ++k) {
232  const GO g_0 = globalIDs[k] - g_min;
233 
234  // The first test is a little strange just in case GO is
235  // unsigned. Compilers raise a warning on tests like "x <
236  // 0" if x is unsigned, but don't usually raise a warning if
237  // the expression is a bit more complicated than that.
238  if (g_0 + ONE < ONE || g_0 >= static_cast<GO> (N_G)) {
239  nodeIDs[k] = -1;
240  localIDs[k] = invalidLid;
241  res = IDNotPresent;
242  }
243  else if (g_0 < static_cast<GO> (N_R)) {
244  // The GID comes from the initial sequence of R processes.
245  nodeIDs[k] = static_cast<int> (g_0 / static_cast<GO> (N_L + 1));
246  localIDs[k] = static_cast<LO> (g_0 % static_cast<GO> (N_L + 1));
247  }
248  else if (g_0 >= static_cast<GO> (N_R)) {
249  // The GID comes from the remaining P-R processes.
250  const GO g_R = g_0 - static_cast<GO> (N_R);
251  nodeIDs[k] = static_cast<int> (R + g_R / N_L);
252  localIDs[k] = static_cast<int> (g_R % N_L);
253  }
254 #ifdef HAVE_TPETRA_DEBUG
255  else {
256  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
257  "Tpetra::ContiguousUniformDirectory::getEntriesImpl: "
258  "should never get here. "
259  "Please report this bug to the Tpetra developers.");
260  }
261 #endif // HAVE_TPETRA_DEBUG
262  }
263  }
264  else { // don't compute local indices
265  for (size_type k = 0; k < numGids; ++k) {
266  const GO g_0 = globalIDs[k] - g_min;
267  // The first test is a little strange just in case GO is
268  // unsigned. Compilers raise a warning on tests like "x <
269  // 0" if x is unsigned, but don't usually raise a warning if
270  // the expression is a bit more complicated than that.
271  if (g_0 + ONE < ONE || g_0 >= static_cast<GO> (N_G)) {
272  nodeIDs[k] = -1;
273  res = IDNotPresent;
274  }
275  else if (g_0 < static_cast<GO> (N_R)) {
276  // The GID comes from the initial sequence of R processes.
277  nodeIDs[k] = static_cast<int> (g_0 / static_cast<GO> (N_L + 1));
278  }
279  else if (g_0 >= static_cast<GO> (N_R)) {
280  // The GID comes from the remaining P-R processes.
281  const GO g_R = g_0 - static_cast<GO> (N_R);
282  nodeIDs[k] = static_cast<int> (R + g_R / N_L);
283  }
284 #ifdef HAVE_TPETRA_DEBUG
285  else {
286  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
287  "Tpetra::ContiguousUniformDirectory::getEntriesImpl: "
288  "should never get here. "
289  "Please report this bug to the Tpetra developers.");
290  }
291 #endif // HAVE_TPETRA_DEBUG
292  }
293  }
294  return res;
295  }
296 
297  template<class LO, class GO, class NT>
300  {
301  using Teuchos::arcp;
302  using Teuchos::gatherAll;
303  using Teuchos::RCP;
304 
305  RCP<const Teuchos::Comm<int> > comm = map.getComm ();
306 
307  TEUCHOS_TEST_FOR_EXCEPTION(! map.isDistributed (), std::invalid_argument,
308  Teuchos::typeName (*this) << " constructor: Map is not distributed.");
309  TEUCHOS_TEST_FOR_EXCEPTION(! map.isContiguous (), std::invalid_argument,
310  Teuchos::typeName (*this) << " constructor: Map is not contiguous.");
311 
312  const int numProcs = comm->getSize ();
313 
314  // Make room for the min global ID on each process, plus one
315  // entry at the end for the "max cap."
316  allMinGIDs_ = arcp<GO> (numProcs + 1);
317  // Get my process' min global ID.
318  GO minMyGID = map.getMinGlobalIndex ();
319  // Gather all of the min global IDs into the first numProcs
320  // entries of allMinGIDs_.
321 
322  // FIXME (mfh 16 Apr 2013) GIANT HACK BELOW
323  //
324  // The purpose of this giant hack is that gatherAll appears to
325  // interpret the "receive count" argument differently than
326  // MPI_Allgather does. Matt Bettencourt reports Valgrind issues
327  // (memcpy with overlapping data) with MpiComm<int>::gatherAll,
328  // which could relate either to this, or to OpenMPI.
329 #ifdef HAVE_TPETRACORE_MPI
330  MPI_Datatype rawMpiType = MPI_INT;
331  bool useRawMpi = true;
332  if (typeid (GO) == typeid (int)) {
333  rawMpiType = MPI_INT;
334  } else if (typeid (GO) == typeid (long)) {
335  rawMpiType = MPI_LONG;
336  } else {
337  useRawMpi = false;
338  }
339  if (useRawMpi) {
340  using Teuchos::rcp_dynamic_cast;
341  using Teuchos::MpiComm;
342  RCP<const MpiComm<int> > mpiComm =
343  rcp_dynamic_cast<const MpiComm<int> > (comm);
344  // It could be a SerialComm instead, even in an MPI build, so
345  // be sure to check.
346  if (! comm.is_null ()) {
347  MPI_Comm rawMpiComm = * (mpiComm->getRawMpiComm ());
348  const int err =
349  MPI_Allgather (&minMyGID, 1, rawMpiType,
350  allMinGIDs_.getRawPtr (), 1, rawMpiType,
351  rawMpiComm);
352  TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
353  "Tpetra::DistributedContiguousDirectory: MPI_Allgather failed");
354  } else {
355  gatherAll<int, GO> (*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr ());
356  }
357  } else {
358  gatherAll<int, GO> (*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr ());
359  }
360 #else // NOT HAVE_TPETRACORE_MPI
361  gatherAll<int, GO> (*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr ());
362 #endif // HAVE_TPETRACORE_MPI
363  // FIXME (mfh 16 Apr 2013) GIANT HACK ABOVE
364 
365  //gatherAll<int, GO> (*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr ());
366 
367  // Put the max cap at the end. Adding one lets us write loops
368  // over the global IDs with the usual strict less-than bound.
369  allMinGIDs_[numProcs] = map.getMaxAllGlobalIndex ()
370  + Teuchos::OrdinalTraits<GO>::one ();
371  }
372 
373  template<class LO, class GO, class NT>
374  std::string
376  {
377  std::ostringstream os;
378  os << "DistributedContiguousDirectory"
379  << "<" << Teuchos::TypeNameTraits<LO>::name ()
380  << ", " << Teuchos::TypeNameTraits<GO>::name ()
381  << ", " << Teuchos::TypeNameTraits<NT>::name () << ">";
382  return os.str ();
383  }
384 
385  template<class LO, class GO, class NT>
389  const Teuchos::ArrayView<const GO> &globalIDs,
390  const Teuchos::ArrayView<int> &nodeIDs,
391  const Teuchos::ArrayView<LO> &localIDs,
392  const bool computeLIDs) const
393  {
394  using Teuchos::Array;
395  using Teuchos::ArrayRCP;
396  using Teuchos::ArrayView;
397  using Teuchos::as;
398  using Teuchos::Comm;
399  using Teuchos::RCP;
400 
402  RCP<const Teuchos::Comm<int> > comm = map.getComm ();
403  const int myRank = comm->getRank ();
404 
405  // Map is on one process or is locally replicated.
406  typename ArrayView<int>::iterator procIter = nodeIDs.begin();
407  typename ArrayView<LO>::iterator lidIter = localIDs.begin();
408  typename ArrayView<const GO>::iterator gidIter;
409  for (gidIter = globalIDs.begin(); gidIter != globalIDs.end(); ++gidIter) {
410  if (map.isNodeGlobalElement (*gidIter)) {
411  *procIter++ = myRank;
412  if (computeLIDs) {
413  *lidIter++ = map.getLocalElement (*gidIter);
414  }
415  }
416  else {
417  // Advance the pointers, leaving these values set to invalid
418  procIter++;
419  if (computeLIDs) {
420  lidIter++;
421  }
422  res = IDNotPresent;
423  }
424  }
425  return res;
426  }
427 
428  template<class LO, class GO, class NT>
432  const Teuchos::ArrayView<const GO> &globalIDs,
433  const Teuchos::ArrayView<int> &nodeIDs,
434  const Teuchos::ArrayView<LO> &localIDs,
435  const bool computeLIDs) const
436  {
437  using Teuchos::Array;
438  using Teuchos::ArrayRCP;
439  using Teuchos::ArrayView;
440  using Teuchos::as;
441  using Teuchos::Comm;
442  using Teuchos::RCP;
443 
444  RCP<const Teuchos::Comm<int> > comm = map.getComm ();
445  const int numProcs = comm->getSize ();
446  const global_size_t nOverP = map.getGlobalNumElements () / numProcs;
447  const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid();
449 
450  // Map is distributed but contiguous.
451  typename ArrayView<int>::iterator procIter = nodeIDs.begin();
452  typename ArrayView<LO>::iterator lidIter = localIDs.begin();
453  typename ArrayView<const GO>::iterator gidIter;
454  for (gidIter = globalIDs.begin(); gidIter != globalIDs.end(); ++gidIter) {
455  LO LID = LINVALID; // Assume not found until proven otherwise
456  int image = -1;
457  GO GID = *gidIter;
458  // Guess uniform distribution and start a little above it
459  // TODO: replace by a binary search
460  int curRank;
461  { // We go through all this trouble to avoid overflow and
462  // signed / unsigned casting mistakes (that were made in
463  // previous versions of this code).
464  const GO one = as<GO> (1);
465  const GO two = as<GO> (2);
466  const GO nOverP_GID = as<GO> (nOverP);
467  const GO lowerBound = GID / std::max(nOverP_GID, one) + two;
468  curRank = as<int>(std::min(lowerBound, as<GO>(numProcs - 1)));
469  }
470  bool found = false;
471  while (curRank >= 0 && curRank < numProcs) {
472  if (allMinGIDs_[curRank] <= GID) {
473  if (GID < allMinGIDs_[curRank + 1]) {
474  found = true;
475  break;
476  }
477  else {
478  curRank++;
479  }
480  }
481  else {
482  curRank--;
483  }
484  }
485  if (found) {
486  image = curRank;
487  LID = as<LO> (GID - allMinGIDs_[image]);
488  }
489  else {
490  res = IDNotPresent;
491  }
492  *procIter++ = image;
493  if (computeLIDs) {
494  *lidIter++ = LID;
495  }
496  }
497  return res;
498  }
499 
500  template<class LO, class GO, class NT>
503  oneToOneResult_ (ONE_TO_ONE_NOT_CALLED_YET), // to be revised below
504  locallyOneToOne_ (true), // to be revised below
505  useHashTables_ (false) // to be revised below
506  {
507  initialize (map, Teuchos::null);
508  }
509 
510  template<class LO, class GO, class NT>
513  const tie_break_type& tie_break) :
514  oneToOneResult_ (ONE_TO_ONE_NOT_CALLED_YET), // to be revised below
515  locallyOneToOne_ (true), // to be revised below
516  useHashTables_ (false) // to be revised below
517  {
518  initialize (map, Teuchos::ptrFromRef (tie_break));
519  }
520 
521  template<class LO, class GO, class NT>
522  void
524  initialize (const map_type& map,
525  Teuchos::Ptr<const tie_break_type> tie_break)
526  {
527  using Teuchos::arcp;
528  using Teuchos::Array;
529  using Teuchos::ArrayRCP;
530  using Teuchos::ArrayView;
531  using Teuchos::as;
532  using Teuchos::RCP;
533  using Teuchos::rcp;
534  using Teuchos::typeName;
535  using Teuchos::TypeNameTraits;
536  using std::cerr;
537  using std::endl;
538  typedef Array<int>::size_type size_type;
539 
540  // This class' implementation of getEntriesImpl() currently
541  // encodes the following assumptions:
542  //
543  // 1. global_size_t >= GO
544  // 2. global_size_t >= int
545  // 3. global_size_t >= LO
546  //
547  // We check these assumptions here.
548  TEUCHOS_TEST_FOR_EXCEPTION(sizeof(global_size_t) < sizeof(GO),
549  std::logic_error, typeName (*this) << ": sizeof(Tpetra::"
550  "global_size_t) = " << sizeof(global_size_t) << " < sizeof(Global"
551  "Ordinal = " << TypeNameTraits<LO>::name () << ") = " << sizeof(GO)
552  << ".");
553  TEUCHOS_TEST_FOR_EXCEPTION(sizeof(global_size_t) < sizeof(int),
554  std::logic_error, typeName (*this) << ": sizeof(Tpetra::"
555  "global_size_t) = " << sizeof(global_size_t) << " < sizeof(int) = "
556  << sizeof(int) << ".");
557  TEUCHOS_TEST_FOR_EXCEPTION(sizeof(global_size_t) < sizeof(LO),
558  std::logic_error, typeName (*this) << ": sizeof(Tpetra::"
559  "global_size_t) = " << sizeof(global_size_t) << " < sizeof(Local"
560  "Ordinal = " << TypeNameTraits<LO>::name () << ") = " << sizeof(LO)
561  << ".");
562 
563  RCP<const Teuchos::Comm<int> > comm = map.getComm ();
564  const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid ();
565  const GO minAllGID = map.getMinAllGlobalIndex ();
566  const GO maxAllGID = map.getMaxAllGlobalIndex ();
567 
568  // The "Directory Map" (see below) will have a range of elements
569  // from the minimum to the maximum GID of the user Map, and a
570  // minimum GID of minAllGID from the user Map. It doesn't
571  // actually have to store all those entries, though do beware of
572  // calling getNodeElementList on it (see Bug 5822).
573  const global_size_t numGlobalEntries = maxAllGID - minAllGID + 1;
574 
575  // We can't afford to replicate the whole directory on each
576  // process, so create the "Directory Map", a uniform contiguous
577  // Map that describes how we will distribute the directory over
578  // processes.
579  //
580  // FIXME (mfh 08 May 2012) Here we're setting minAllGID to be
581  // the index base. The index base should be separate from the
582  // minimum GID.
583  directoryMap_ = rcp (new map_type (numGlobalEntries, minAllGID, comm,
584  GloballyDistributed));
585  // The number of Directory elements that my process owns.
586  const size_t dir_numMyEntries = directoryMap_->getNodeNumElements ();
587 
588  // Fix for Bug 5822: If the input Map is "sparse," that is if
589  // the difference between the global min and global max GID is
590  // much larger than the global number of elements in the input
591  // Map, then it's possible that the Directory Map might have
592  // many more entries than the input Map on this process. This
593  // can cause memory scalability issues. In that case, we switch
594  // from the array-based implementation of Directory storage to
595  // the hash table - based implementation. We don't use hash
596  // tables all the time, because they are slower in the common
597  // case of a nonsparse Map.
598  //
599  // NOTE: This is a per-process decision. Some processes may use
600  // array-based storage, whereas others may use hash table -
601  // based storage.
602 
603  // A hash table takes a constant factor more space, more or
604  // less, than an array. Thus, it's not worthwhile, even in
605  // terms of memory usage, always to use a hash table.
606  // Furthermore, array lookups are faster than hash table
607  // lookups, so it may be worthwhile to use an array even if it
608  // takes more space. The "sparsity threshold" governs when to
609  // switch to a hash table - based implementation.
610  const size_t inverseSparsityThreshold = 10;
611  useHashTables_ =
612  (dir_numMyEntries >= inverseSparsityThreshold * map.getNodeNumElements());
613 
614  // Get list of process IDs that own the directory entries for the
615  // Map GIDs. These will be the targets of the sends that the
616  // Distributor will do.
617  const int myRank = comm->getRank ();
618  const size_t numMyEntries = map.getNodeNumElements ();
619  Array<int> sendImageIDs (numMyEntries);
620  ArrayView<const GO> myGlobalEntries = map.getNodeElementList ();
621  // An ID not present in this lookup indicates that it lies outside
622  // of the range [minAllGID,maxAllGID] (from map_). this means
623  // something is wrong with map_, our fault.
624  const LookupStatus lookupStatus =
625  directoryMap_->getRemoteIndexList (myGlobalEntries, sendImageIDs);
626  TEUCHOS_TEST_FOR_EXCEPTION(
627  lookupStatus == IDNotPresent, std::logic_error, Teuchos::typeName(*this)
628  << " constructor: the Directory Map could not find out where one or "
629  "more of my Map's indices should go. The input to getRemoteIndexList "
630  "is " << Teuchos::toString (myGlobalEntries) << ", and the output is "
631  << Teuchos::toString (sendImageIDs ()) << ". The input Map itself has "
632  "the following entries on the calling process " <<
633  map.getComm ()->getRank () << ": " <<
634  Teuchos::toString (map.getNodeElementList ()) << ", and has "
635  << map.getGlobalNumElements () << " total global indices in ["
636  << map.getMinAllGlobalIndex () << "," << map.getMaxAllGlobalIndex ()
637  << "]. The Directory Map has "
638  << directoryMap_->getGlobalNumElements () << " total global indices in "
639  "[" << directoryMap_->getMinAllGlobalIndex () << "," <<
640  directoryMap_->getMaxAllGlobalIndex () << "], and the calling process "
641  "has GIDs [" << directoryMap_->getMinGlobalIndex () << "," <<
642  directoryMap_->getMaxGlobalIndex () << "]. "
643  "This probably means there is a bug in Map or Directory. "
644  "Please report this bug to the Tpetra developers.");
645 
646  // Initialize the distributor using the list of process IDs to
647  // which to send. We'll use the distributor to send out triples
648  // of (GID, process ID, LID). We're sending the entries to the
649  // processes that the Directory Map says should own them, which is
650  // why we called directoryMap_->getRemoteIndexList() above.
651  Distributor distor (comm);
652  const size_t numReceives = distor.createFromSends (sendImageIDs);
653 
654  // NOTE (mfh 21 Mar 2012) The following code assumes that
655  // sizeof(GO) >= sizeof(int) and sizeof(GO) >= sizeof(LO).
656  //
657  // Create and fill buffer of (GID, PID, LID) triples to send
658  // out. We pack the (GID, PID, LID) triples into a single Array
659  // of GO, casting the PID from int to GO and the LID from LO to
660  // GO as we do so.
661  //
662  // FIXME (mfh 23 Mar 2014) This assumes that sizeof(LO) <=
663  // sizeof(GO) and sizeof(int) <= sizeof(GO). The former is
664  // required, and the latter is generally the case, but we should
665  // still check for this.
666  const int packetSize = 3; // We're sending triples, so packet size is 3.
667  Array<GO> exportEntries (packetSize * numMyEntries); // data to send out
668  {
669  size_type exportIndex = 0;
670  for (size_type i = 0; i < static_cast<size_type> (numMyEntries); ++i) {
671  exportEntries[exportIndex++] = myGlobalEntries[i];
672  exportEntries[exportIndex++] = as<GO> (myRank);
673  exportEntries[exportIndex++] = as<GO> (i);
674  }
675  }
676  // Buffer of data to receive. The Distributor figured out for
677  // us how many packets we're receiving, when we called its
678  // createFromSends() method to set up the distribution plan.
679  Array<GO> importElements (packetSize * distor.getTotalReceiveLength ());
680 
681  // Distribute the triples of (GID, process ID, LID).
682  distor.doPostsAndWaits (exportEntries ().getConst (), packetSize, importElements ());
683 
684  // Unpack the redistributed data. Both implementations of
685  // Directory storage map from an LID in the Directory Map (which
686  // is the LID of the GID to store) to either a PID or an LID in
687  // the input Map. Each "packet" (contiguous chunk of
688  // importElements) contains a triple: (GID, PID, LID).
689  if (useHashTables_) {
690  // Create the hash tables. We know exactly how many elements
691  // to expect in each hash table. FixedHashTable's constructor
692  // currently requires all the keys and values at once, so we
693  // have to extract them in temporary arrays. It may be
694  // possible to rewrite FixedHashTable to use a "start fill" /
695  // "end fill" approach that avoids the temporary arrays, but
696  // we won't try that for now.
697 
698  // The constructors of Array and ArrayRCP that take a number
699  // of elements all initialize the arrays. Instead, allocate
700  // raw arrays, then hand them off to ArrayRCP, to avoid the
701  // initial unnecessary initialization without losing the
702  // benefit of exception safety (and bounds checking, in a
703  // debug build).
704  LO* tableKeysRaw = NULL;
705  LO* tableLidsRaw = NULL;
706  int* tablePidsRaw = NULL;
707  try {
708  tableKeysRaw = new LO [numReceives];
709  tableLidsRaw = new LO [numReceives];
710  tablePidsRaw = new int [numReceives];
711  } catch (...) {
712  if (tableKeysRaw != NULL) {
713  delete [] tableKeysRaw;
714  }
715  if (tableLidsRaw != NULL) {
716  delete [] tableLidsRaw;
717  }
718  if (tablePidsRaw != NULL) {
719  delete [] tablePidsRaw;
720  }
721  throw;
722  }
723  ArrayRCP<LO> tableKeys (tableKeysRaw, 0, numReceives, true);
724  ArrayRCP<LO> tableLids (tableLidsRaw, 0, numReceives, true);
725  ArrayRCP<int> tablePids (tablePidsRaw, 0, numReceives, true);
726 
727  if (tie_break.is_null ()) {
728  // Fill the temporary arrays of keys and values.
729  size_type importIndex = 0;
730  for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
731  const GO curGID = importElements[importIndex++];
732  const LO curLID = directoryMap_->getLocalElement (curGID);
733  TEUCHOS_TEST_FOR_EXCEPTION(
734  curLID == LINVALID, std::logic_error,
735  Teuchos::typeName(*this) << " constructor: Incoming global index "
736  << curGID << " does not have a corresponding local index in the "
737  "Directory Map. Please report this bug to the Tpetra developers.");
738  tableKeys[i] = curLID;
739  tablePids[i] = importElements[importIndex++];
740  tableLids[i] = importElements[importIndex++];
741  }
742  // Set up the hash tables. The hash tables' constructor
743  // detects whether there are duplicates, so that we can set
744  // locallyOneToOne_.
745  typedef Kokkos::Device<typename NT::execution_space,
746  typename NT::memory_space> DT;
747  lidToPidTable_ =
748  rcp (new Details::FixedHashTable<LO, int, DT> (tableKeys (),
749  tablePids ()));
750  locallyOneToOne_ = ! (lidToPidTable_->hasDuplicateKeys ());
751  lidToLidTable_ =
752  rcp (new Details::FixedHashTable<LO, LO, DT> (tableKeys (),
753  tableLids ()));
754  }
755  else { // tie_break is NOT null
756 
757  // For each directory Map LID received, collect all the
758  // corresponding (PID,LID) pairs. If the input Map is not
759  // one-to-one, corresponding directory Map LIDs will have
760  // more than one pair. In that case, we will use the
761  // TieBreak object to pick exactly one pair.
762  typedef std::map<LO, std::vector<std::pair<int, LO> > > pair_table_type;
763  pair_table_type ownedPidLidPairs;
764 
765  // For each directory Map LID received, collect the zero or
766  // more input Map (PID,LID) pairs into ownedPidLidPairs.
767  size_type importIndex = 0;
768  for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
769  const GO curGID = importElements[importIndex++];
770  const LO dirMapLid = directoryMap_->getLocalElement (curGID);
771  TEUCHOS_TEST_FOR_EXCEPTION(
772  dirMapLid == LINVALID, std::logic_error,
773  Teuchos::typeName(*this) << " constructor: Incoming global index "
774  << curGID << " does not have a corresponding local index in the "
775  "Directory Map. Please report this bug to the Tpetra developers.");
776  tableKeys[i] = dirMapLid;
777  const int PID = importElements[importIndex++];
778  const int LID = importElements[importIndex++];
779 
780  // These may change below. We fill them in just to ensure
781  // that they won't have invalid values.
782  tablePids[i] = PID;
783  tableLids[i] = LID;
784 
785  // For every directory Map LID, we have to remember all
786  // (PID, LID) pairs. The TieBreak object will arbitrate
787  // between them in the loop below.
788  ownedPidLidPairs[dirMapLid].push_back (std::make_pair (PID, LID));
789  }
790 
791  // Use TieBreak to arbitrate between (PID,LID) pairs
792  // corresponding to each directory Map LID.
793  //
794  // FIXME (mfh 23 Mar 2014) How do I know that i is the same
795  // as the directory Map LID?
796  // KDD 21 Mar 2018: It isn't, especially if the user's IDs are not
797  // contiguous, but the directory map is. Need to set tablePids[i]
798  // and tableLids[i], so need to loop over numReceives (as that is
799  // how those arrays are allocated). FIXED
800 
801  for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
802  const LO dirMapLid = tableKeys[i];
803  const std::vector<std::pair<int, LO> >& pidLidList =
804  ownedPidLidPairs[dirMapLid];
805  const size_t listLen = pidLidList.size();
806  if (listLen == 0) continue; // KDD This will never happen
807  const GO dirMapGid = directoryMap_->getGlobalElement (dirMapLid);
808  if (listLen > 1) {
809  locallyOneToOne_ = false;
810  }
811  // If there is some (PID,LID) pair for the current input
812  // Map LID, then it makes sense to invoke the TieBreak
813  // object to arbitrate between the options. Even if
814  // there is only one (PID,LID) pair, we still want to
815  // give the TieBreak object a chance to do whatever it
816  // likes to do, in terms of side effects (e.g., track
817  // (PID,LID) pairs).
818  const size_type index =
819  static_cast<size_type> (tie_break->selectedIndex (dirMapGid,
820  pidLidList));
821  tablePids[i] = pidLidList[index].first;
822  tableLids[i] = pidLidList[index].second;
823  }
824 
825  // Set up the hash tables.
826  typedef Kokkos::Device<typename NT::execution_space,
827  typename NT::memory_space> DT;
828  lidToPidTable_ =
829  rcp (new Details::FixedHashTable<LO, int, DT> (tableKeys (),
830  tablePids ()));
831  lidToLidTable_ =
832  rcp (new Details::FixedHashTable<LO, LO, DT> (tableKeys (),
833  tableLids ()));
834  }
835  }
836  else {
837  if (tie_break.is_null ()) {
838  // Use array-based implementation of Directory storage.
839  // Allocate these arrays and fill them with invalid values,
840  // in case the input Map's GID list is sparse (i.e., does
841  // not populate all GIDs from minAllGID to maxAllGID).
842  PIDs_ = arcp<int> (dir_numMyEntries);
843  std::fill (PIDs_.begin (), PIDs_.end (), -1);
844  LIDs_ = arcp<LO> (dir_numMyEntries);
845  std::fill (LIDs_.begin (), LIDs_.end (), LINVALID);
846  // Fill in the arrays with PIDs resp. LIDs.
847  size_type importIndex = 0;
848  for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
849  const GO curGID = importElements[importIndex++];
850  const LO curLID = directoryMap_->getLocalElement (curGID);
851  TEUCHOS_TEST_FOR_EXCEPTION(curLID == LINVALID, std::logic_error,
852  Teuchos::typeName(*this) << " constructor: Incoming global index "
853  << curGID << " does not have a corresponding local index in the "
854  "Directory Map. Please report this bug to the Tpetra developers.");
855 
856  // If PIDs_[curLID] is not -1, then curGID is a duplicate
857  // on the calling process, so the Directory is not locally
858  // one-to-one.
859  if (PIDs_[curLID] != -1) {
860  locallyOneToOne_ = false;
861  }
862  PIDs_[curLID] = importElements[importIndex++];
863  LIDs_[curLID] = importElements[importIndex++];
864  }
865  }
866  else {
867  PIDs_ = arcp<int> (dir_numMyEntries);
868  LIDs_ = arcp<LO> (dir_numMyEntries);
869  std::fill (PIDs_.begin (), PIDs_.end (), -1);
870 
871  // All received (PID, LID) pairs go into ownedPidLidPairs.
872  // This is a map from the directory Map's LID to the (PID,
873  // LID) pair (where the latter LID comes from the input Map,
874  // not the directory Map). If the input Map is not
875  // one-to-one, corresponding LIDs will have
876  // ownedPidLidPairs[curLID].size() > 1. In that case, we
877  // will use the TieBreak object to pick exactly one pair.
878  Array<std::vector<std::pair<int, LO> > > ownedPidLidPairs (dir_numMyEntries);
879  size_type importIndex = 0;
880  for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
881  const GO GID = importElements[importIndex++];
882  const int PID = importElements[importIndex++];
883  const LO LID = importElements[importIndex++];
884 
885  const LO dirMapLid = directoryMap_->getLocalElement (GID);
886  TEUCHOS_TEST_FOR_EXCEPTION(
887  dirMapLid == LINVALID, std::logic_error,
888  Teuchos::typeName(*this) << " constructor: Incoming global index "
889  << GID << " does not have a corresponding local index in the "
890  "Directory Map. Please report this bug to the Tpetra developers.");
891  ownedPidLidPairs[dirMapLid].push_back (std::make_pair (PID, LID));
892  }
893 
894  // Use TieBreak to arbitrate between (PID,LID) pairs
895  // corresponding to each directory Map LID.
896  //
897  // FIXME (mfh 23 Mar 2014) How do I know that i is the same
898  // as the directory Map LID?
899  // KDD 21 Mar 2018: It isn't, especially if the user's IDs are not
900  // contiguous. Loop over all ownedPidLidPairs; skip those that have
901  // empty lists. FIXED
902 
903  for (size_t i = 0; i < dir_numMyEntries; ++i) {
904  const std::vector<std::pair<int, LO> >& pidLidList =
905  ownedPidLidPairs[i];
906  const size_t listLen = pidLidList.size();
907  if (listLen == 0) continue; // KDD will happen for GIDs not in
908  // KDD the user's source map
909  const LO dirMapLid = static_cast<LO> (i);
910  const GO dirMapGid = directoryMap_->getGlobalElement (dirMapLid);
911  if (listLen > 1) {
912  locallyOneToOne_ = false;
913  }
914  // If there is some (PID,LID) pair for the current input
915  // Map LID, then it makes sense to invoke the TieBreak
916  // object to arbitrate between the options. Even if
917  // there is only one (PID,LID) pair, we still want to
918  // give the TieBreak object a chance to do whatever it
919  // likes to do, in terms of side effects (e.g., track
920  // (PID,LID) pairs).
921  const size_type index =
922  static_cast<size_type> (tie_break->selectedIndex (dirMapGid,
923  pidLidList));
924  PIDs_[i] = pidLidList[index].first;
925  LIDs_[i] = pidLidList[index].second;
926  }
927  }
928  }
929  }
930 
931  template<class LO, class GO, class NT>
932  std::string
934  {
935  std::ostringstream os;
936  os << "DistributedNoncontiguousDirectory"
937  << "<" << Teuchos::TypeNameTraits<LO>::name ()
938  << ", " << Teuchos::TypeNameTraits<GO>::name ()
939  << ", " << Teuchos::TypeNameTraits<NT>::name () << ">";
940  return os.str ();
941  }
942 
943  template<class LO, class GO, class NT>
947  const Teuchos::ArrayView<const GO> &globalIDs,
948  const Teuchos::ArrayView<int> &nodeIDs,
949  const Teuchos::ArrayView<LO> &localIDs,
950  const bool computeLIDs) const
951  {
952  using Teuchos::Array;
953  using Teuchos::ArrayRCP;
954  using Teuchos::ArrayView;
955  using Teuchos::as;
956  using Teuchos::RCP;
957  using std::cerr;
958  using std::endl;
959  typedef typename Array<GO>::size_type size_type;
960 
961  RCP<const Teuchos::Comm<int> > comm = map.getComm ();
962  const size_t numEntries = globalIDs.size ();
963  const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid();
965 
966  //
967  // Set up directory structure.
968  //
969 
970  // If we're computing LIDs, we also have to include them in each
971  // packet, along with the GID and process ID.
972  const int packetSize = computeLIDs ? 3 : 2;
973 
974  // For data distribution, we use: Surprise! A Distributor!
975  Distributor distor (comm);
976 
977  // Get directory locations for the requested list of entries.
978  Array<int> dirImages (numEntries);
979  res = directoryMap_->getRemoteIndexList (globalIDs, dirImages ());
980  // Check for unfound globalIDs and set corresponding nodeIDs to -1
981  size_t numMissing = 0;
982  if (res == IDNotPresent) {
983  for (size_t i=0; i < numEntries; ++i) {
984  if (dirImages[i] == -1) {
985  nodeIDs[i] = -1;
986  if (computeLIDs) {
987  localIDs[i] = LINVALID;
988  }
989  numMissing++;
990  }
991  }
992  }
993 
994  Array<GO> sendGIDs;
995  Array<int> sendImages;
996  distor.createFromRecvs (globalIDs, dirImages (), sendGIDs, sendImages);
997  const size_type numSends = sendGIDs.size ();
998 
999  //
1000  // mfh 13 Nov 2012:
1001  //
1002  // The code below temporarily stores LO, GO, and int values in
1003  // an array of global_size_t. If one of the signed types (LO
1004  // and GO should both be signed) happened to be -1 (or some
1005  // negative number, but -1 is the one that came up today), then
1006  // conversion to global_size_t will result in a huge
1007  // global_size_t value, and thus conversion back may overflow.
1008  // (Teuchos::as doesn't know that we meant it to be an LO or GO
1009  // all along.)
1010  //
1011  // The overflow normally would not be a problem, since it would
1012  // just go back to -1 again. However, Teuchos::as does range
1013  // checking on conversions in a debug build, so it throws an
1014  // exception (std::range_error) in this case. Range checking is
1015  // generally useful in debug mode, so we don't want to disable
1016  // this behavior globally.
1017  //
1018  // We solve this problem by forgoing use of Teuchos::as for the
1019  // conversions below from LO, GO, or int to global_size_t, and
1020  // the later conversions back from global_size_t to LO, GO, or
1021  // int.
1022  //
1023  // I've recorded this discussion as Bug 5760.
1024  //
1025 
1026  // global_size_t >= GO
1027  // global_size_t >= size_t >= int
1028  // global_size_t >= size_t >= LO
1029  // Therefore, we can safely store all of these in a global_size_t
1030  Array<global_size_t> exports (packetSize * numSends);
1031  {
1032  // Packet format:
1033  // - If computing LIDs: (GID, PID, LID)
1034  // - Otherwise: (GID, PID)
1035  //
1036  // "PID" means "process ID" (a.k.a. "node ID," a.k.a. "rank").
1037 
1038  // Current position to which to write in exports array. If
1039  // sending pairs, we pack the (GID, PID) pair for gid =
1040  // sendGIDs[k] in exports[2*k], exports[2*k+1]. If sending
1041  // triples, we pack the (GID, PID, LID) pair for gid =
1042  // sendGIDs[k] in exports[3*k, 3*k+1, 3*k+2].
1043  size_type exportsIndex = 0;
1044 
1045  if (useHashTables_) {
1046  for (size_type gidIndex = 0; gidIndex < numSends; ++gidIndex) {
1047  const GO curGID = sendGIDs[gidIndex];
1048  // Don't use as() here (see above note).
1049  exports[exportsIndex++] = static_cast<global_size_t> (curGID);
1050  const LO curLID = directoryMap_->getLocalElement (curGID);
1051  TEUCHOS_TEST_FOR_EXCEPTION(curLID == LINVALID, std::logic_error,
1052  Teuchos::typeName (*this) << "::getEntriesImpl(): The Directory "
1053  "Map's global index " << curGID << " does not have a corresponding "
1054  "local index. Please report this bug to the Tpetra developers.");
1055  // Don't use as() here (see above note).
1056  exports[exportsIndex++] = static_cast<global_size_t> (lidToPidTable_->get (curLID));
1057  if (computeLIDs) {
1058  // Don't use as() here (see above note).
1059  exports[exportsIndex++] = static_cast<global_size_t> (lidToLidTable_->get (curLID));
1060  }
1061  }
1062  } else {
1063  for (size_type gidIndex = 0; gidIndex < numSends; ++gidIndex) {
1064  const GO curGID = sendGIDs[gidIndex];
1065  // Don't use as() here (see above note).
1066  exports[exportsIndex++] = static_cast<global_size_t> (curGID);
1067  const LO curLID = directoryMap_->getLocalElement (curGID);
1068  TEUCHOS_TEST_FOR_EXCEPTION(curLID == LINVALID, std::logic_error,
1069  Teuchos::typeName (*this) << "::getEntriesImpl(): The Directory "
1070  "Map's global index " << curGID << " does not have a corresponding "
1071  "local index. Please report this bug to the Tpetra developers.");
1072  // Don't use as() here (see above note).
1073  exports[exportsIndex++] = static_cast<global_size_t> (PIDs_[curLID]);
1074  if (computeLIDs) {
1075  // Don't use as() here (see above note).
1076  exports[exportsIndex++] = static_cast<global_size_t> (LIDs_[curLID]);
1077  }
1078  }
1079  }
1080 
1081  TEUCHOS_TEST_FOR_EXCEPTION(
1082  exportsIndex > exports.size (), std::logic_error,
1083  Teuchos::typeName (*this) << "::getEntriesImpl(): On Process " <<
1084  comm->getRank () << ", exportsIndex = " << exportsIndex <<
1085  " > exports.size() = " << exports.size () <<
1086  ". Please report this bug to the Tpetra developers.");
1087  }
1088 
1089  TEUCHOS_TEST_FOR_EXCEPTION(
1090  numEntries < numMissing, std::logic_error,
1091  Teuchos::typeName (*this) << "::getEntriesImpl(): On Process "
1092  << comm->getRank () << ", numEntries = " << numEntries
1093  << " < numMissing = " << numMissing
1094  << ". Please report this bug to the Tpetra developers.");
1095 
1096  //
1097  // mfh 13 Nov 2012: See note above on conversions between
1098  // global_size_t and LO, GO, or int.
1099  //
1100  const size_t numRecv = numEntries - numMissing;
1101 
1102  {
1103  const size_t importLen = packetSize * distor.getTotalReceiveLength ();
1104  const size_t requiredImportLen = numRecv * packetSize;
1105  const int myRank = comm->getRank ();
1106  TEUCHOS_TEST_FOR_EXCEPTION(
1107  importLen < requiredImportLen, std::logic_error,
1108  "Tpetra::Details::DistributedNoncontiguousDirectory::getEntriesImpl: "
1109  "On Process " << myRank << ": The 'imports' array must have length "
1110  "at least " << requiredImportLen << ", but its actual length is " <<
1111  importLen << ". numRecv: " << numRecv << ", packetSize: " <<
1112  packetSize << ", numEntries (# GIDs): " << numEntries <<
1113  ", numMissing: " << numMissing << ": distor.getTotalReceiveLength(): "
1114  << distor.getTotalReceiveLength () << ". " << std::endl <<
1115  "Distributor description: " << distor.description () << ". "
1116  << std::endl <<
1117  "Please report this bug to the Tpetra developers.");
1118  }
1119 
1120  Array<global_size_t> imports (packetSize * distor.getTotalReceiveLength ());
1121  // FIXME (mfh 20 Mar 2014) One could overlap the sort2() below
1122  // with communication, by splitting this call into doPosts and
1123  // doWaits. The code is still correct in this form, however.
1124  distor.doPostsAndWaits (exports ().getConst (), packetSize, imports ());
1125 
1126  Array<GO> sortedIDs (globalIDs); // deep copy (for later sorting)
1127  Array<GO> offset (numEntries); // permutation array (sort2 output)
1128  for (GO ii = 0; ii < static_cast<GO> (numEntries); ++ii) {
1129  offset[ii] = ii;
1130  }
1131  sort2 (sortedIDs.begin(), sortedIDs.begin() + numEntries, offset.begin());
1132 
1133  size_t importsIndex = 0;
1134  //typename Array<global_size_t>::iterator ptr = imports.begin();
1135  typedef typename Array<GO>::iterator IT;
1136 
1137  // we know these conversions are in range, because we loaded this data
1138  for (size_t i = 0; i < numRecv; ++i) {
1139  // Don't use as() here (see above note).
1140  const GO curGID = static_cast<GO> (imports[importsIndex++]);
1141  std::pair<IT, IT> p1 = std::equal_range (sortedIDs.begin(), sortedIDs.end(), curGID);
1142  if (p1.first != p1.second) {
1143  const size_t j = p1.first - sortedIDs.begin();
1144  // Don't use as() here (see above note).
1145  nodeIDs[offset[j]] = static_cast<int> (imports[importsIndex++]);
1146  if (computeLIDs) {
1147  // Don't use as() here (see above note).
1148  localIDs[offset[j]] = static_cast<LO> (imports[importsIndex++]);
1149  }
1150  if (nodeIDs[offset[j]] == -1) {
1151  res = IDNotPresent;
1152  }
1153  }
1154  }
1155 
1156  TEUCHOS_TEST_FOR_EXCEPTION(
1157  static_cast<size_t> (importsIndex) > static_cast<size_t> (imports.size ()),
1158  std::logic_error,
1159  "Tpetra::Details::DistributedNoncontiguousDirectory::getEntriesImpl: "
1160  "On Process " << comm->getRank () << ": importsIndex = " <<
1161  importsIndex << " > imports.size() = " << imports.size () << ". "
1162  "numRecv: " << numRecv << ", packetSize: " << packetSize << ", "
1163  "numEntries (# GIDs): " << numEntries << ", numMissing: " << numMissing
1164  << ": distor.getTotalReceiveLength(): "
1165  << distor.getTotalReceiveLength () << ". Please report this bug to "
1166  "the Tpetra developers.");
1167 
1168  return res;
1169  }
1170 
1171 
1172  template<class LO, class GO, class NT>
1173  bool
1175  isOneToOne (const Teuchos::Comm<int>& comm) const
1176  {
1177  if (oneToOneResult_ == ONE_TO_ONE_NOT_CALLED_YET) {
1178  const int lcl121 = isLocallyOneToOne () ? 1 : 0;
1179  int gbl121 = 0;
1180  Teuchos::reduceAll<int, int> (comm, Teuchos::REDUCE_MIN, lcl121,
1181  Teuchos::outArg (gbl121));
1182  oneToOneResult_ = (gbl121 == 1) ? ONE_TO_ONE_TRUE : ONE_TO_ONE_FALSE;
1183  }
1184  return (oneToOneResult_ == ONE_TO_ONE_TRUE);
1185  }
1186  } // namespace Details
1187 } // namespace Tpetra
1188 
1189 //
1190 // Explicit instantiation macro
1191 //
1192 // Must be expanded from within the Tpetra::Details namespace!
1193 //
1194 #define TPETRA_DIRECTORY_IMPL_INSTANT(LO,GO,NODE) \
1195  template class Directory< LO , GO , NODE >; \
1196  template class ReplicatedDirectory< LO , GO , NODE >; \
1197  template class ContiguousUniformDirectory< LO, GO, NODE >; \
1198  template class DistributedContiguousDirectory< LO , GO , NODE >; \
1199  template class DistributedNoncontiguousDirectory< LO , GO , NODE >; \
1200 
1201 #endif // __Tpetra_DirectoryImpl_def_hpp
Interface for breaking ties in ownership.
void doPostsAndWaits(const Teuchos::ArrayView< const Packet > &exports, size_t numPackets, const Teuchos::ArrayView< Packet > &imports)
Execute the (forward) communication plan.
GlobalOrdinal getMinGlobalIndex() const
The minimum global index owned by the calling process.
Interface for breaking ties in ownership.
std::string description() const
A one-line human-readable description of this object.
std::string description() const
Return a one-line description of this object.
bool isNodeGlobalElement(GlobalOrdinal globalIndex) const
Whether the given global index is owned by this Map on the calling process.
bool isContiguous() const
True if this Map is distributed contiguously, else false.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const
Find process IDs and (optionally) local IDs for the given global IDs.
bool isUniform() const
Whether the range of global indices is uniform.
Implementation of Directory for a distributed noncontiguous Map.
bool isDistributed() const
Whether this Map is globally distributed or locally replicated.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
Implementation of Directory for a distributed contiguous Map.
GlobalOrdinal getMinAllGlobalIndex() const
The minimum global index over all processes in the communicator.
virtual bool isOneToOne(const Teuchos::Comm< int > &comm) const
Whether the Directory&#39;s input Map is (globally) one to one.
ReplicatedDirectory()
Constructor (that takes no arguments).
GlobalOrdinal getMaxAllGlobalIndex() const
The maximum global index over all processes in the communicator.
virtual bool isOneToOne(const Teuchos::Comm< int > &comm) const
Whether the Directory&#39;s input Map is (globally) one to one.
size_t global_size_t
Global size_t object.
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const
Find process IDs and (optionally) local IDs for the given global IDs.
LookupStatus getEntries(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const
Sets up and executes a communication plan for a Tpetra DistObject.
size_t getTotalReceiveLength() const
Total number of values this process will receive from other processes.
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const
Find process IDs and (optionally) local IDs for the given global IDs.
std::string description() const
A one-line human-readable description of this object.
std::string description() const
A one-line human-readable description of this object.
LocalOrdinal getLocalElement(GlobalOrdinal globalIndex) const
The local index corresponding to the given global index.
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2)
Sort the first array, and apply the resulting permutation to the second array.
A parallel distribution of indices over processes.
std::string description() const
A one-line human-readable description of this object.
Declaration of implementation details of Tpetra::Directory.
Implementation of Directory for a contiguous, uniformly distributed Map.
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const
Find process IDs and (optionally) local IDs for the given global IDs.
void createFromRecvs(const Teuchos::ArrayView< const Ordinal > &remoteIDs, const Teuchos::ArrayView< const int > &remoteProcIDs, Teuchos::Array< Ordinal > &exportIDs, Teuchos::Array< int > &exportProcIDs)
Set up Distributor using list of process ranks from which to receive.
global_size_t getGlobalNumElements() const
The number of elements in this Map.