Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_DirectoryImpl_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ************************************************************************
38 // @HEADER
39 
40 #ifndef TPETRA_DIRECTORYIMPL_DEF_HPP
41 #define TPETRA_DIRECTORYIMPL_DEF_HPP
42 
45 
46 #include "Tpetra_Distributor.hpp"
47 #include "Tpetra_Map.hpp"
48 #include "Tpetra_TieBreak.hpp"
49 #include "Tpetra_Util.hpp"
50 #include "Tpetra_Details_FixedHashTable.hpp"
51 #include "Teuchos_Comm.hpp"
52 #include <memory>
53 #include <sstream>
54 
55 // FIXME (mfh 16 Apr 2013) GIANT HACK BELOW
56 #ifdef HAVE_TPETRACORE_MPI
57 # include <mpi.h>
58 #endif // HAVE_TPETRACORE_MPI
59 // FIXME (mfh 16 Apr 2013) GIANT HACK ABOVE
60 
61 namespace Tpetra {
62  namespace Details {
63  template<class LO, class GO, class NT>
66  getEntries (const map_type& map,
67  const Teuchos::ArrayView<const GO> &globalIDs,
68  const Teuchos::ArrayView<int> &nodeIDs,
69  const Teuchos::ArrayView<LO> &localIDs,
70  const bool computeLIDs) const
71  {
72  // Ensure that globalIDs, nodeIDs, and localIDs (if applicable)
73  // all have the same size, before modifying any output arguments.
74  TEUCHOS_TEST_FOR_EXCEPTION(nodeIDs.size() != globalIDs.size(),
75  std::invalid_argument, Teuchos::typeName(*this) << "::getEntries(): "
76  "Output arrays do not have the right sizes. nodeIDs.size() = "
77  << nodeIDs.size() << " != globalIDs.size() = " << globalIDs.size()
78  << ".");
79  TEUCHOS_TEST_FOR_EXCEPTION(
80  computeLIDs && localIDs.size() != globalIDs.size(),
81  std::invalid_argument, Teuchos::typeName(*this) << "::getEntries(): "
82  "Output array do not have the right sizes. localIDs.size() = "
83  << localIDs.size() << " != globalIDs.size() = " << globalIDs.size()
84  << ".");
85 
86  // Initially, fill nodeIDs and localIDs (if applicable) with
87  // invalid values. The "invalid" process ID is -1 (this means
88  // the same thing as MPI_ANY_SOURCE to Teuchos, so it's an
89  // "invalid" process ID); the invalid local ID comes from
90  // OrdinalTraits.
91  std::fill (nodeIDs.begin(), nodeIDs.end(), -1);
92  if (computeLIDs) {
93  std::fill (localIDs.begin(), localIDs.end(),
94  Teuchos::OrdinalTraits<LO>::invalid ());
95  }
96  // Actually do the work.
97  return this->getEntriesImpl (map, globalIDs, nodeIDs, localIDs, computeLIDs);
98  }
99 
100 
101  template<class LO, class GO, class NT>
104  numProcs_ (map.getComm ()->getSize ())
105  {}
106 
107 
108  template<class LO, class GO, class NT>
109  bool
111  isOneToOne (const Teuchos::Comm<int>& /* comm */) const
112  {
113  // A locally replicated Map is one-to-one only if there is no
114  // replication, that is, only if the Map's communicator only has
115  // one process.
116  return (numProcs_ == 1);
117  }
118 
119 
120  template<class LO, class GO, class NT>
121  std::string
123  {
124  std::ostringstream os;
125  os << "ReplicatedDirectory"
126  << "<" << Teuchos::TypeNameTraits<LO>::name ()
127  << ", " << Teuchos::TypeNameTraits<GO>::name ()
128  << ", " << Teuchos::TypeNameTraits<NT>::name () << ">";
129  return os.str ();
130  }
131 
132 
133  template<class LO, class GO, class NT>
135  ContiguousUniformDirectory (const map_type& map)
136  {
137  TEUCHOS_TEST_FOR_EXCEPTION(! map.isContiguous (), std::invalid_argument,
138  Teuchos::typeName (*this) << " constructor: Map is not contiguous.");
139  TEUCHOS_TEST_FOR_EXCEPTION(! map.isUniform (), std::invalid_argument,
140  Teuchos::typeName (*this) << " constructor: Map is not uniform.");
141  }
142 
143 
144  template<class LO, class GO, class NT>
145  std::string
147  {
148  std::ostringstream os;
149  os << "ContiguousUniformDirectory"
150  << "<" << Teuchos::TypeNameTraits<LO>::name ()
151  << ", " << Teuchos::TypeNameTraits<GO>::name ()
152  << ", " << Teuchos::TypeNameTraits<NT>::name () << ">";
153  return os.str ();
154  }
155 
156 
157  template<class LO, class GO, class NT>
161  const Teuchos::ArrayView<const GO> &globalIDs,
162  const Teuchos::ArrayView<int> &nodeIDs,
163  const Teuchos::ArrayView<LO> &localIDs,
164  const bool computeLIDs) const
165  {
166  using Teuchos::Comm;
167  using Teuchos::RCP;
168  typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
169  const LO invalidLid = Teuchos::OrdinalTraits<LO>::invalid ();
171 
172  RCP<const Comm<int> > comm = map.getComm ();
173  const GO g_min = map.getMinAllGlobalIndex ();
174 
175  // Let N_G be the global number of elements in the Map,
176  // and P be the number of processes in its communicator.
177  // Then, N_G = P * N_L + R = R*(N_L + 1) + (P - R)*N_L.
178  //
179  // The first R processes own N_L+1 elements.
180  // The remaining P-R processes own N_L elements.
181  //
182  // Let g be the current GID, g_min be the global minimum GID,
183  // and g_0 = g - g_min. If g is a valid GID in this Map, then
184  // g_0 is in [0, N_G - 1].
185  //
186  // If g is a valid GID in this Map and g_0 < R*(N_L + 1), then
187  // the rank of the process that owns g is floor(g_0 / (N_L +
188  // 1)), and its corresponding local index on that process is g_0
189  // mod (N_L + 1).
190  //
191  // Let g_R = g_0 - R*(N_L + 1). If g is a valid GID in this Map
192  // and g_0 >= R*(N_L + 1), then the rank of the process that
193  // owns g is then R + floor(g_R / N_L), and its corresponding
194  // local index on that process is g_R mod N_L.
195 
196  const size_type N_G =
197  static_cast<size_type> (map.getGlobalNumElements ());
198  const size_type P = static_cast<size_type> (comm->getSize ());
199  const size_type N_L = N_G / P;
200  const size_type R = N_G - N_L * P; // N_G mod P
201  const size_type N_R = R * (N_L + static_cast<size_type> (1));
202 
203 #ifdef HAVE_TPETRA_DEBUG
204  TEUCHOS_TEST_FOR_EXCEPTION(
205  N_G != P*N_L + R, std::logic_error,
206  "Tpetra::ContiguousUniformDirectory::getEntriesImpl: "
207  "N_G = " << N_G << " != P*N_L + R = " << P << "*" << N_L << " + " << R
208  << " = " << P*N_L + R << ". "
209  "Please report this bug to the Tpetra developers.");
210 #endif // HAVE_TPETRA_DEBUG
211 
212  const size_type numGids = globalIDs.size (); // for const loop bound
213  // Avoid signed/unsigned comparisons below, in case GO is
214  // unsigned. (Integer literals are generally signed.)
215  const GO ONE = static_cast<GO> (1);
216 
217  if (computeLIDs) {
218  for (size_type k = 0; k < numGids; ++k) {
219  const GO g_0 = globalIDs[k] - g_min;
220 
221  // The first test is a little strange just in case GO is
222  // unsigned. Compilers raise a warning on tests like "x <
223  // 0" if x is unsigned, but don't usually raise a warning if
224  // the expression is a bit more complicated than that.
225  if (g_0 + ONE < ONE || g_0 >= static_cast<GO> (N_G)) {
226  nodeIDs[k] = -1;
227  localIDs[k] = invalidLid;
228  res = IDNotPresent;
229  }
230  else if (g_0 < static_cast<GO> (N_R)) {
231  // The GID comes from the initial sequence of R processes.
232  nodeIDs[k] = static_cast<int> (g_0 / static_cast<GO> (N_L + 1));
233  localIDs[k] = static_cast<LO> (g_0 % static_cast<GO> (N_L + 1));
234  }
235  else if (g_0 >= static_cast<GO> (N_R)) {
236  // The GID comes from the remaining P-R processes.
237  const GO g_R = g_0 - static_cast<GO> (N_R);
238  nodeIDs[k] = static_cast<int> (R + g_R / N_L);
239  localIDs[k] = static_cast<int> (g_R % N_L);
240  }
241 #ifdef HAVE_TPETRA_DEBUG
242  else {
243  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
244  "Tpetra::ContiguousUniformDirectory::getEntriesImpl: "
245  "should never get here. "
246  "Please report this bug to the Tpetra developers.");
247  }
248 #endif // HAVE_TPETRA_DEBUG
249  }
250  }
251  else { // don't compute local indices
252  for (size_type k = 0; k < numGids; ++k) {
253  const GO g_0 = globalIDs[k] - g_min;
254  // The first test is a little strange just in case GO is
255  // unsigned. Compilers raise a warning on tests like "x <
256  // 0" if x is unsigned, but don't usually raise a warning if
257  // the expression is a bit more complicated than that.
258  if (g_0 + ONE < ONE || g_0 >= static_cast<GO> (N_G)) {
259  nodeIDs[k] = -1;
260  res = IDNotPresent;
261  }
262  else if (g_0 < static_cast<GO> (N_R)) {
263  // The GID comes from the initial sequence of R processes.
264  nodeIDs[k] = static_cast<int> (g_0 / static_cast<GO> (N_L + 1));
265  }
266  else if (g_0 >= static_cast<GO> (N_R)) {
267  // The GID comes from the remaining P-R processes.
268  const GO g_R = g_0 - static_cast<GO> (N_R);
269  nodeIDs[k] = static_cast<int> (R + g_R / N_L);
270  }
271 #ifdef HAVE_TPETRA_DEBUG
272  else {
273  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
274  "Tpetra::ContiguousUniformDirectory::getEntriesImpl: "
275  "should never get here. "
276  "Please report this bug to the Tpetra developers.");
277  }
278 #endif // HAVE_TPETRA_DEBUG
279  }
280  }
281  return res;
282  }
283 
284  template<class LO, class GO, class NT>
286  DistributedContiguousDirectory (const map_type& map)
287  {
288  using Teuchos::arcp;
289  using Teuchos::gatherAll;
290  using Teuchos::RCP;
291 
292  RCP<const Teuchos::Comm<int> > comm = map.getComm ();
293 
294  TEUCHOS_TEST_FOR_EXCEPTION(! map.isDistributed (), std::invalid_argument,
295  Teuchos::typeName (*this) << " constructor: Map is not distributed.");
296  TEUCHOS_TEST_FOR_EXCEPTION(! map.isContiguous (), std::invalid_argument,
297  Teuchos::typeName (*this) << " constructor: Map is not contiguous.");
298 
299  const int numProcs = comm->getSize ();
300 
301  // Make room for the min global ID on each process, plus one
302  // entry at the end for the "max cap."
303  allMinGIDs_ = arcp<GO> (numProcs + 1);
304  // Get my process' min global ID.
305  GO minMyGID = map.getMinGlobalIndex ();
306  // Gather all of the min global IDs into the first numProcs
307  // entries of allMinGIDs_.
308 
309  // FIXME (mfh 16 Apr 2013) GIANT HACK BELOW
310  //
311  // The purpose of this giant hack is that gatherAll appears to
312  // interpret the "receive count" argument differently than
313  // MPI_Allgather does. Matt Bettencourt reports Valgrind issues
314  // (memcpy with overlapping data) with MpiComm<int>::gatherAll,
315  // which could relate either to this, or to OpenMPI.
316 #ifdef HAVE_TPETRACORE_MPI
317  MPI_Datatype rawMpiType = MPI_INT;
318  bool useRawMpi = true;
319  if (typeid (GO) == typeid (int)) {
320  rawMpiType = MPI_INT;
321  } else if (typeid (GO) == typeid (long)) {
322  rawMpiType = MPI_LONG;
323  } else {
324  useRawMpi = false;
325  }
326  if (useRawMpi) {
327  using Teuchos::rcp_dynamic_cast;
328  using Teuchos::MpiComm;
329  RCP<const MpiComm<int> > mpiComm =
330  rcp_dynamic_cast<const MpiComm<int> > (comm);
331  // It could be a SerialComm instead, even in an MPI build, so
332  // be sure to check.
333  if (! comm.is_null ()) {
334  MPI_Comm rawMpiComm = * (mpiComm->getRawMpiComm ());
335  const int err =
336  MPI_Allgather (&minMyGID, 1, rawMpiType,
337  allMinGIDs_.getRawPtr (), 1, rawMpiType,
338  rawMpiComm);
339  TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
340  "Tpetra::DistributedContiguousDirectory: MPI_Allgather failed");
341  } else {
342  gatherAll<int, GO> (*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr ());
343  }
344  } else {
345  gatherAll<int, GO> (*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr ());
346  }
347 #else // NOT HAVE_TPETRACORE_MPI
348  gatherAll<int, GO> (*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr ());
349 #endif // HAVE_TPETRACORE_MPI
350  // FIXME (mfh 16 Apr 2013) GIANT HACK ABOVE
351 
352  //gatherAll<int, GO> (*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr ());
353 
354  // Put the max cap at the end. Adding one lets us write loops
355  // over the global IDs with the usual strict less-than bound.
356  allMinGIDs_[numProcs] = map.getMaxAllGlobalIndex ()
357  + Teuchos::OrdinalTraits<GO>::one ();
358  }
359 
360  template<class LO, class GO, class NT>
361  std::string
363  {
364  std::ostringstream os;
365  os << "DistributedContiguousDirectory"
366  << "<" << Teuchos::TypeNameTraits<LO>::name ()
367  << ", " << Teuchos::TypeNameTraits<GO>::name ()
368  << ", " << Teuchos::TypeNameTraits<NT>::name () << ">";
369  return os.str ();
370  }
371 
372  template<class LO, class GO, class NT>
376  const Teuchos::ArrayView<const GO> &globalIDs,
377  const Teuchos::ArrayView<int> &nodeIDs,
378  const Teuchos::ArrayView<LO> &localIDs,
379  const bool computeLIDs) const
380  {
381  using Teuchos::Array;
382  using Teuchos::ArrayRCP;
383  using Teuchos::ArrayView;
384  using Teuchos::as;
385  using Teuchos::Comm;
386  using Teuchos::RCP;
387 
389  RCP<const Teuchos::Comm<int> > comm = map.getComm ();
390  const int myRank = comm->getRank ();
391 
392  // Map is on one process or is locally replicated.
393  typename ArrayView<int>::iterator procIter = nodeIDs.begin();
394  typename ArrayView<LO>::iterator lidIter = localIDs.begin();
395  typename ArrayView<const GO>::iterator gidIter;
396  for (gidIter = globalIDs.begin(); gidIter != globalIDs.end(); ++gidIter) {
397  if (map.isNodeGlobalElement (*gidIter)) {
398  *procIter++ = myRank;
399  if (computeLIDs) {
400  *lidIter++ = map.getLocalElement (*gidIter);
401  }
402  }
403  else {
404  // Advance the pointers, leaving these values set to invalid
405  procIter++;
406  if (computeLIDs) {
407  lidIter++;
408  }
409  res = IDNotPresent;
410  }
411  }
412  return res;
413  }
414 
415  template<class LO, class GO, class NT>
419  const Teuchos::ArrayView<const GO> &globalIDs,
420  const Teuchos::ArrayView<int> &nodeIDs,
421  const Teuchos::ArrayView<LO> &localIDs,
422  const bool computeLIDs) const
423  {
424  using Teuchos::Array;
425  using Teuchos::ArrayRCP;
426  using Teuchos::ArrayView;
427  using Teuchos::as;
428  using Teuchos::Comm;
429  using Teuchos::RCP;
430 
431  RCP<const Teuchos::Comm<int> > comm = map.getComm ();
432  const int numProcs = comm->getSize ();
433  const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid();
434  const GO noGIDsOnProc = std::numeric_limits<GO>::max();
436 
437  // Find the first initialized GID for search below
438  int firstProcWithGIDs;
439  for (firstProcWithGIDs = 0; firstProcWithGIDs < numProcs;
440  firstProcWithGIDs++) {
441  if (allMinGIDs_[firstProcWithGIDs] != noGIDsOnProc) break;
442  }
443 
444  // If Map is empty, return invalid values for all requested lookups
445  // This case should not happen, as an empty Map is not considered
446  // Distributed.
447  if (firstProcWithGIDs == numProcs) {
448  // No entries in Map
449  res = (globalIDs.size() > 0) ? IDNotPresent : AllIDsPresent;
450  std::fill(nodeIDs.begin(), nodeIDs.end(), -1);
451  if (computeLIDs)
452  std::fill(localIDs.begin(), localIDs.end(), LINVALID);
453  return res;
454  }
455 
456  const GO one = as<GO> (1);
457  const GO nOverP = as<GO> (map.getGlobalNumElements ()
458  / as<global_size_t>(numProcs - firstProcWithGIDs));
459 
460  // Map is distributed but contiguous.
461  typename ArrayView<int>::iterator procIter = nodeIDs.begin();
462  typename ArrayView<LO>::iterator lidIter = localIDs.begin();
463  typename ArrayView<const GO>::iterator gidIter;
464  for (gidIter = globalIDs.begin(); gidIter != globalIDs.end(); ++gidIter) {
465  LO LID = LINVALID; // Assume not found until proven otherwise
466  int image = -1;
467  GO GID = *gidIter;
468  // Guess uniform distribution (TODO: maybe replace by a binary search)
469  // We go through all this trouble to avoid overflow and
470  // signed / unsigned casting mistakes (that were made in
471  // previous versions of this code).
472  int curRank;
473  const GO firstGuess = firstProcWithGIDs + GID / std::max(nOverP, one);
474  curRank = as<int>(std::min(firstGuess, as<GO>(numProcs - 1)));
475 
476  // This while loop will stop because
477  // allMinGIDs_[np] == global num elements
478  while (allMinGIDs_[curRank] == noGIDsOnProc) curRank++;
479 
480  bool badGID = false;
481  while (curRank >= firstProcWithGIDs && GID < allMinGIDs_[curRank]) {
482  curRank--;
483  while (curRank >= firstProcWithGIDs &&
484  allMinGIDs_[curRank] == noGIDsOnProc) curRank--;
485  }
486  if (curRank < firstProcWithGIDs) {
487  // GID is lower than all GIDs in map
488  badGID = true;
489  }
490  else if (curRank == numProcs) {
491  // GID is higher than all GIDs in map
492  badGID = true;
493  }
494  else {
495  // we have the lower bound; now limit from above
496  int above = curRank + 1;
497  while (allMinGIDs_[above] == noGIDsOnProc) above++;
498 
499  while (GID >= allMinGIDs_[above]) {
500  curRank = above;
501  if (curRank == numProcs) {
502  // GID is higher than all GIDs in map
503  badGID = true;
504  break;
505  }
506  above++;
507  while (allMinGIDs_[above] == noGIDsOnProc) above++;
508  }
509  }
510 
511  if (!badGID) {
512  image = curRank;
513  LID = as<LO> (GID - allMinGIDs_[image]);
514  }
515  else {
516  res = IDNotPresent;
517  }
518  *procIter++ = image;
519  if (computeLIDs) {
520  *lidIter++ = LID;
521  }
522  }
523  return res;
524  }
525 
526  template<class LO, class GO, class NT>
528  DistributedNoncontiguousDirectory (const map_type& map) :
529  oneToOneResult_ (ONE_TO_ONE_NOT_CALLED_YET), // to be revised below
530  locallyOneToOne_ (true), // to be revised below
531  useHashTables_ (false) // to be revised below
532  {
533  initialize (map, Teuchos::null);
534  }
535 
536  template<class LO, class GO, class NT>
537  DistributedNoncontiguousDirectory<LO, GO, NT>::
538  DistributedNoncontiguousDirectory (const map_type& map,
539  const tie_break_type& tie_break) :
540  oneToOneResult_ (ONE_TO_ONE_NOT_CALLED_YET), // to be revised below
541  locallyOneToOne_ (true), // to be revised below
542  useHashTables_ (false) // to be revised below
543  {
544  initialize (map, Teuchos::ptrFromRef (tie_break));
545  }
546 
547  template<class LO, class GO, class NT>
548  void
549  DistributedNoncontiguousDirectory<LO, GO, NT>::
550  initialize (const map_type& map,
551  Teuchos::Ptr<const tie_break_type> tie_break)
552  {
553  using Teuchos::arcp;
554  using Teuchos::Array;
555  using Teuchos::ArrayRCP;
556  using Teuchos::ArrayView;
557  using Teuchos::as;
558  using Teuchos::RCP;
559  using Teuchos::rcp;
560  using Teuchos::typeName;
561  using Teuchos::TypeNameTraits;
562  using std::cerr;
563  using std::endl;
564  typedef Array<int>::size_type size_type;
565 
566  // This class' implementation of getEntriesImpl() currently
567  // encodes the following assumptions:
568  //
569  // 1. global_size_t >= GO
570  // 2. global_size_t >= int
571  // 3. global_size_t >= LO
572  //
573  // We check these assumptions here.
574  TEUCHOS_TEST_FOR_EXCEPTION(sizeof(global_size_t) < sizeof(GO),
575  std::logic_error, typeName (*this) << ": sizeof(Tpetra::"
576  "global_size_t) = " << sizeof(global_size_t) << " < sizeof(Global"
577  "Ordinal = " << TypeNameTraits<LO>::name () << ") = " << sizeof(GO)
578  << ".");
579  TEUCHOS_TEST_FOR_EXCEPTION(sizeof(global_size_t) < sizeof(int),
580  std::logic_error, typeName (*this) << ": sizeof(Tpetra::"
581  "global_size_t) = " << sizeof(global_size_t) << " < sizeof(int) = "
582  << sizeof(int) << ".");
583  TEUCHOS_TEST_FOR_EXCEPTION(sizeof(global_size_t) < sizeof(LO),
584  std::logic_error, typeName (*this) << ": sizeof(Tpetra::"
585  "global_size_t) = " << sizeof(global_size_t) << " < sizeof(Local"
586  "Ordinal = " << TypeNameTraits<LO>::name () << ") = " << sizeof(LO)
587  << ".");
588 
589  RCP<const Teuchos::Comm<int> > comm = map.getComm ();
590  const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid ();
591  const GO minAllGID = map.getMinAllGlobalIndex ();
592  const GO maxAllGID = map.getMaxAllGlobalIndex ();
593 
594  // The "Directory Map" (see below) will have a range of elements
595  // from the minimum to the maximum GID of the user Map, and a
596  // minimum GID of minAllGID from the user Map. It doesn't
597  // actually have to store all those entries, though do beware of
598  // calling getNodeElementList on it (see Bug 5822).
599  const global_size_t numGlobalEntries = maxAllGID - minAllGID + 1;
600 
601  // We can't afford to replicate the whole directory on each
602  // process, so create the "Directory Map", a uniform contiguous
603  // Map that describes how we will distribute the directory over
604  // processes.
605  //
606  // FIXME (mfh 08 May 2012) Here we're setting minAllGID to be
607  // the index base. The index base should be separate from the
608  // minimum GID.
609  directoryMap_ = rcp (new map_type (numGlobalEntries, minAllGID, comm,
610  GloballyDistributed));
611  // The number of Directory elements that my process owns.
612  const size_t dir_numMyEntries = directoryMap_->getNodeNumElements ();
613 
614  // Fix for Bug 5822: If the input Map is "sparse," that is if
615  // the difference between the global min and global max GID is
616  // much larger than the global number of elements in the input
617  // Map, then it's possible that the Directory Map might have
618  // many more entries than the input Map on this process. This
619  // can cause memory scalability issues. In that case, we switch
620  // from the array-based implementation of Directory storage to
621  // the hash table - based implementation. We don't use hash
622  // tables all the time, because they are slower in the common
623  // case of a nonsparse Map.
624  //
625  // NOTE: This is a per-process decision. Some processes may use
626  // array-based storage, whereas others may use hash table -
627  // based storage.
628 
629  // A hash table takes a constant factor more space, more or
630  // less, than an array. Thus, it's not worthwhile, even in
631  // terms of memory usage, always to use a hash table.
632  // Furthermore, array lookups are faster than hash table
633  // lookups, so it may be worthwhile to use an array even if it
634  // takes more space. The "sparsity threshold" governs when to
635  // switch to a hash table - based implementation.
636  const size_t inverseSparsityThreshold = 10;
637  useHashTables_ =
638  (dir_numMyEntries >= inverseSparsityThreshold * map.getNodeNumElements());
639 
640  // Get list of process IDs that own the directory entries for the
641  // Map GIDs. These will be the targets of the sends that the
642  // Distributor will do.
643  const int myRank = comm->getRank ();
644  const size_t numMyEntries = map.getNodeNumElements ();
645  Array<int> sendImageIDs (numMyEntries);
646  ArrayView<const GO> myGlobalEntries = map.getNodeElementList ();
647  // An ID not present in this lookup indicates that it lies outside
648  // of the range [minAllGID,maxAllGID] (from map_). this means
649  // something is wrong with map_, our fault.
650  const LookupStatus lookupStatus =
651  directoryMap_->getRemoteIndexList (myGlobalEntries, sendImageIDs);
652  TEUCHOS_TEST_FOR_EXCEPTION(
653  lookupStatus == IDNotPresent, std::logic_error, Teuchos::typeName(*this)
654  << " constructor: the Directory Map could not find out where one or "
655  "more of my Map's indices should go. The input to getRemoteIndexList "
656  "is " << Teuchos::toString (myGlobalEntries) << ", and the output is "
657  << Teuchos::toString (sendImageIDs ()) << ". The input Map itself has "
658  "the following entries on the calling process " <<
659  map.getComm ()->getRank () << ": " <<
660  Teuchos::toString (map.getNodeElementList ()) << ", and has "
661  << map.getGlobalNumElements () << " total global indices in ["
662  << map.getMinAllGlobalIndex () << "," << map.getMaxAllGlobalIndex ()
663  << "]. The Directory Map has "
664  << directoryMap_->getGlobalNumElements () << " total global indices in "
665  "[" << directoryMap_->getMinAllGlobalIndex () << "," <<
666  directoryMap_->getMaxAllGlobalIndex () << "], and the calling process "
667  "has GIDs [" << directoryMap_->getMinGlobalIndex () << "," <<
668  directoryMap_->getMaxGlobalIndex () << "]. "
669  "This probably means there is a bug in Map or Directory. "
670  "Please report this bug to the Tpetra developers.");
671 
672  // Initialize the distributor using the list of process IDs to
673  // which to send. We'll use the distributor to send out triples
674  // of (GID, process ID, LID). We're sending the entries to the
675  // processes that the Directory Map says should own them, which is
676  // why we called directoryMap_->getRemoteIndexList() above.
677  Distributor distor (comm);
678  const size_t numReceives = distor.createFromSends (sendImageIDs);
679 
680  // NOTE (mfh 21 Mar 2012) The following code assumes that
681  // sizeof(GO) >= sizeof(int) and sizeof(GO) >= sizeof(LO).
682  //
683  // Create and fill buffer of (GID, PID, LID) triples to send
684  // out. We pack the (GID, PID, LID) triples into a single Array
685  // of GO, casting the PID from int to GO and the LID from LO to
686  // GO as we do so.
687  //
688  // FIXME (mfh 23 Mar 2014) This assumes that sizeof(LO) <=
689  // sizeof(GO) and sizeof(int) <= sizeof(GO). The former is
690  // required, and the latter is generally the case, but we should
691  // still check for this.
692  const int packetSize = 3; // We're sending triples, so packet size is 3.
693  Array<GO> exportEntries (packetSize * numMyEntries); // data to send out
694  {
695  size_type exportIndex = 0;
696  for (size_type i = 0; i < static_cast<size_type> (numMyEntries); ++i) {
697  exportEntries[exportIndex++] = myGlobalEntries[i];
698  exportEntries[exportIndex++] = as<GO> (myRank);
699  exportEntries[exportIndex++] = as<GO> (i);
700  }
701  }
702  // Buffer of data to receive. The Distributor figured out for
703  // us how many packets we're receiving, when we called its
704  // createFromSends() method to set up the distribution plan.
705  Array<GO> importElements (packetSize * distor.getTotalReceiveLength ());
706 
707  // Distribute the triples of (GID, process ID, LID).
708  distor.doPostsAndWaits (exportEntries ().getConst (), packetSize, importElements ());
709 
710  // Unpack the redistributed data. Both implementations of
711  // Directory storage map from an LID in the Directory Map (which
712  // is the LID of the GID to store) to either a PID or an LID in
713  // the input Map. Each "packet" (contiguous chunk of
714  // importElements) contains a triple: (GID, PID, LID).
715  if (useHashTables_) {
716  // Create the hash tables. We know exactly how many elements
717  // to expect in each hash table. FixedHashTable's constructor
718  // currently requires all the keys and values at once, so we
719  // have to extract them in temporary arrays. It may be
720  // possible to rewrite FixedHashTable to use a "start fill" /
721  // "end fill" approach that avoids the temporary arrays, but
722  // we won't try that for now.
723 
724  // The constructors of Array and ArrayRCP that take a number
725  // of elements all initialize the arrays. Instead, allocate
726  // raw arrays, then hand them off to ArrayRCP, to avoid the
727  // initial unnecessary initialization without losing the
728  // benefit of exception safety (and bounds checking, in a
729  // debug build).
730  LO* tableKeysRaw = NULL;
731  LO* tableLidsRaw = NULL;
732  int* tablePidsRaw = NULL;
733  try {
734  tableKeysRaw = new LO [numReceives];
735  tableLidsRaw = new LO [numReceives];
736  tablePidsRaw = new int [numReceives];
737  } catch (...) {
738  if (tableKeysRaw != NULL) {
739  delete [] tableKeysRaw;
740  }
741  if (tableLidsRaw != NULL) {
742  delete [] tableLidsRaw;
743  }
744  if (tablePidsRaw != NULL) {
745  delete [] tablePidsRaw;
746  }
747  throw;
748  }
749  ArrayRCP<LO> tableKeys (tableKeysRaw, 0, numReceives, true);
750  ArrayRCP<LO> tableLids (tableLidsRaw, 0, numReceives, true);
751  ArrayRCP<int> tablePids (tablePidsRaw, 0, numReceives, true);
752 
753  if (tie_break.is_null ()) {
754  // Fill the temporary arrays of keys and values.
755  size_type importIndex = 0;
756  for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
757  const GO curGID = importElements[importIndex++];
758  const LO curLID = directoryMap_->getLocalElement (curGID);
759  TEUCHOS_TEST_FOR_EXCEPTION(
760  curLID == LINVALID, std::logic_error,
761  Teuchos::typeName(*this) << " constructor: Incoming global index "
762  << curGID << " does not have a corresponding local index in the "
763  "Directory Map. Please report this bug to the Tpetra developers.");
764  tableKeys[i] = curLID;
765  tablePids[i] = importElements[importIndex++];
766  tableLids[i] = importElements[importIndex++];
767  }
768  // Set up the hash tables. The hash tables' constructor
769  // detects whether there are duplicates, so that we can set
770  // locallyOneToOne_.
771  typedef Kokkos::Device<typename NT::execution_space,
772  typename NT::memory_space> DT;
773  lidToPidTable_ =
774  rcp (new Details::FixedHashTable<LO, int, DT> (tableKeys (),
775  tablePids ()));
776  locallyOneToOne_ = ! (lidToPidTable_->hasDuplicateKeys ());
777  lidToLidTable_ =
778  rcp (new Details::FixedHashTable<LO, LO, DT> (tableKeys (),
779  tableLids ()));
780  }
781  else { // tie_break is NOT null
782 
783  // For each directory Map LID received, collect all the
784  // corresponding (PID,LID) pairs. If the input Map is not
785  // one-to-one, corresponding directory Map LIDs will have
786  // more than one pair. In that case, we will use the
787  // TieBreak object to pick exactly one pair.
788  typedef std::map<LO, std::vector<std::pair<int, LO> > > pair_table_type;
789  pair_table_type ownedPidLidPairs;
790 
791  // For each directory Map LID received, collect the zero or
792  // more input Map (PID,LID) pairs into ownedPidLidPairs.
793  size_type importIndex = 0;
794  for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
795  const GO curGID = importElements[importIndex++];
796  const LO dirMapLid = directoryMap_->getLocalElement (curGID);
797  TEUCHOS_TEST_FOR_EXCEPTION(
798  dirMapLid == LINVALID, std::logic_error,
799  Teuchos::typeName(*this) << " constructor: Incoming global index "
800  << curGID << " does not have a corresponding local index in the "
801  "Directory Map. Please report this bug to the Tpetra developers.");
802  tableKeys[i] = dirMapLid;
803  const int PID = importElements[importIndex++];
804  const int LID = importElements[importIndex++];
805 
806  // These may change below. We fill them in just to ensure
807  // that they won't have invalid values.
808  tablePids[i] = PID;
809  tableLids[i] = LID;
810 
811  // For every directory Map LID, we have to remember all
812  // (PID, LID) pairs. The TieBreak object will arbitrate
813  // between them in the loop below.
814  ownedPidLidPairs[dirMapLid].push_back (std::make_pair (PID, LID));
815  }
816 
817  // Use TieBreak to arbitrate between (PID,LID) pairs
818  // corresponding to each directory Map LID.
819  //
820  // FIXME (mfh 23 Mar 2014) How do I know that i is the same
821  // as the directory Map LID?
822  // KDD 21 Mar 2018: It isn't, especially if the user's IDs are not
823  // contiguous, but the directory map is. Need to set tablePids[i]
824  // and tableLids[i], so need to loop over numReceives (as that is
825  // how those arrays are allocated). FIXED
826 
827  for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
828  const LO dirMapLid = tableKeys[i];
829  const std::vector<std::pair<int, LO> >& pidLidList =
830  ownedPidLidPairs[dirMapLid];
831  const size_t listLen = pidLidList.size();
832  if (listLen == 0) continue; // KDD This will never happen
833  const GO dirMapGid = directoryMap_->getGlobalElement (dirMapLid);
834  if (listLen > 1) {
835  locallyOneToOne_ = false;
836  }
837  // If there is some (PID,LID) pair for the current input
838  // Map LID, then it makes sense to invoke the TieBreak
839  // object to arbitrate between the options. Even if
840  // there is only one (PID,LID) pair, we still want to
841  // give the TieBreak object a chance to do whatever it
842  // likes to do, in terms of side effects (e.g., track
843  // (PID,LID) pairs).
844  const size_type index =
845  static_cast<size_type> (tie_break->selectedIndex (dirMapGid,
846  pidLidList));
847  tablePids[i] = pidLidList[index].first;
848  tableLids[i] = pidLidList[index].second;
849  }
850 
851  // Set up the hash tables.
852  typedef Kokkos::Device<typename NT::execution_space,
853  typename NT::memory_space> DT;
854  lidToPidTable_ =
855  rcp (new Details::FixedHashTable<LO, int, DT> (tableKeys (),
856  tablePids ()));
857  lidToLidTable_ =
858  rcp (new Details::FixedHashTable<LO, LO, DT> (tableKeys (),
859  tableLids ()));
860  }
861  }
862  else {
863  if (tie_break.is_null ()) {
864  // Use array-based implementation of Directory storage.
865  // Allocate these arrays and fill them with invalid values,
866  // in case the input Map's GID list is sparse (i.e., does
867  // not populate all GIDs from minAllGID to maxAllGID).
868  PIDs_ = arcp<int> (dir_numMyEntries);
869  std::fill (PIDs_.begin (), PIDs_.end (), -1);
870  LIDs_ = arcp<LO> (dir_numMyEntries);
871  std::fill (LIDs_.begin (), LIDs_.end (), LINVALID);
872  // Fill in the arrays with PIDs resp. LIDs.
873  size_type importIndex = 0;
874  for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
875  const GO curGID = importElements[importIndex++];
876  const LO curLID = directoryMap_->getLocalElement (curGID);
877  TEUCHOS_TEST_FOR_EXCEPTION(curLID == LINVALID, std::logic_error,
878  Teuchos::typeName(*this) << " constructor: Incoming global index "
879  << curGID << " does not have a corresponding local index in the "
880  "Directory Map. Please report this bug to the Tpetra developers.");
881 
882  // If PIDs_[curLID] is not -1, then curGID is a duplicate
883  // on the calling process, so the Directory is not locally
884  // one-to-one.
885  if (PIDs_[curLID] != -1) {
886  locallyOneToOne_ = false;
887  }
888  PIDs_[curLID] = importElements[importIndex++];
889  LIDs_[curLID] = importElements[importIndex++];
890  }
891  }
892  else {
893  PIDs_ = arcp<int> (dir_numMyEntries);
894  LIDs_ = arcp<LO> (dir_numMyEntries);
895  std::fill (PIDs_.begin (), PIDs_.end (), -1);
896 
897  // All received (PID, LID) pairs go into ownedPidLidPairs.
898  // This is a map from the directory Map's LID to the (PID,
899  // LID) pair (where the latter LID comes from the input Map,
900  // not the directory Map). If the input Map is not
901  // one-to-one, corresponding LIDs will have
902  // ownedPidLidPairs[curLID].size() > 1. In that case, we
903  // will use the TieBreak object to pick exactly one pair.
904  Array<std::vector<std::pair<int, LO> > > ownedPidLidPairs (dir_numMyEntries);
905  size_type importIndex = 0;
906  for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
907  const GO GID = importElements[importIndex++];
908  const int PID = importElements[importIndex++];
909  const LO LID = importElements[importIndex++];
910 
911  const LO dirMapLid = directoryMap_->getLocalElement (GID);
912  TEUCHOS_TEST_FOR_EXCEPTION(
913  dirMapLid == LINVALID, std::logic_error,
914  Teuchos::typeName(*this) << " constructor: Incoming global index "
915  << GID << " does not have a corresponding local index in the "
916  "Directory Map. Please report this bug to the Tpetra developers.");
917  ownedPidLidPairs[dirMapLid].push_back (std::make_pair (PID, LID));
918  }
919 
920  // Use TieBreak to arbitrate between (PID,LID) pairs
921  // corresponding to each directory Map LID.
922  //
923  // FIXME (mfh 23 Mar 2014) How do I know that i is the same
924  // as the directory Map LID?
925  // KDD 21 Mar 2018: It isn't, especially if the user's IDs are not
926  // contiguous. Loop over all ownedPidLidPairs; skip those that have
927  // empty lists. FIXED
928 
929  for (size_t i = 0; i < dir_numMyEntries; ++i) {
930  const std::vector<std::pair<int, LO> >& pidLidList =
931  ownedPidLidPairs[i];
932  const size_t listLen = pidLidList.size();
933  if (listLen == 0) continue; // KDD will happen for GIDs not in
934  // KDD the user's source map
935  const LO dirMapLid = static_cast<LO> (i);
936  const GO dirMapGid = directoryMap_->getGlobalElement (dirMapLid);
937  if (listLen > 1) {
938  locallyOneToOne_ = false;
939  }
940  // If there is some (PID,LID) pair for the current input
941  // Map LID, then it makes sense to invoke the TieBreak
942  // object to arbitrate between the options. Even if
943  // there is only one (PID,LID) pair, we still want to
944  // give the TieBreak object a chance to do whatever it
945  // likes to do, in terms of side effects (e.g., track
946  // (PID,LID) pairs).
947  const size_type index =
948  static_cast<size_type> (tie_break->selectedIndex (dirMapGid,
949  pidLidList));
950  PIDs_[i] = pidLidList[index].first;
951  LIDs_[i] = pidLidList[index].second;
952  }
953  }
954  }
955  }
956 
957  template<class LO, class GO, class NT>
958  std::string
960  {
961  std::ostringstream os;
962  os << "DistributedNoncontiguousDirectory"
963  << "<" << Teuchos::TypeNameTraits<LO>::name ()
964  << ", " << Teuchos::TypeNameTraits<GO>::name ()
965  << ", " << Teuchos::TypeNameTraits<NT>::name () << ">";
966  return os.str ();
967  }
968 
969  template<class LO, class GO, class NT>
972  getEntriesImpl (const map_type& map,
973  const Teuchos::ArrayView<const GO> &globalIDs,
974  const Teuchos::ArrayView<int> &nodeIDs,
975  const Teuchos::ArrayView<LO> &localIDs,
976  const bool computeLIDs) const
977  {
978  using Teuchos::Array;
979  using Teuchos::ArrayRCP;
980  using Teuchos::ArrayView;
981  using Teuchos::as;
982  using Teuchos::RCP;
983  using Teuchos::toString;
984  using Details::Behavior;
986  using std::cerr;
987  using std::endl;
988  using size_type = typename Array<GO>::size_type;
989  const char funcPrefix[] = "Tpetra::"
990  "DistributedNoncontiguousDirectory::getEntriesImpl: ";
991  const char errSuffix[] =
992  " Please report this bug to the Tpetra developers.";
993 
994  RCP<const Teuchos::Comm<int> > comm = map.getComm ();
995  const bool verbose = Behavior::verbose ("Directory") ||
996  Behavior::verbose ("Tpetra::Directory");
997  const size_t maxNumToPrint = verbose ?
999 
1000  std::unique_ptr<std::string> procPrefix;
1001  if (verbose) {
1002  std::ostringstream os;
1003  os << "Proc ";
1004  if (map.getComm ().is_null ()) {
1005  os << "?";
1006  }
1007  else {
1008  os << map.getComm ()->getRank ();
1009  }
1010  os << ": ";
1011  procPrefix = std::unique_ptr<std::string>(
1012  new std::string(os.str()));
1013  os << funcPrefix << "{";
1014  verbosePrintArray(os, globalIDs, "GIDs", maxNumToPrint);
1015  os << ", ";
1016  verbosePrintArray(os, nodeIDs, "PIDs", maxNumToPrint);
1017  os << ", ";
1018  verbosePrintArray(os, localIDs, "LIDs", maxNumToPrint);
1019  os << ", computeLIDs: "
1020  << (computeLIDs ? "true" : "false") << "}" << endl;
1021  cerr << os.str ();
1022  }
1023 
1024  const size_t numEntries = globalIDs.size ();
1025  const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid();
1027 
1028  //
1029  // Set up directory structure.
1030  //
1031 
1032  // If we're computing LIDs, we also have to include them in each
1033  // packet, along with the GID and process ID.
1034  const int packetSize = computeLIDs ? 3 : 2;
1035 
1036  // For data distribution, we use: Surprise! A Distributor!
1037  Distributor distor (comm);
1038 
1039  // Get directory locations for the requested list of entries.
1040  Array<int> dirImages (numEntries);
1041  if (verbose) {
1042  std::ostringstream os;
1043  os << *procPrefix << "Call directoryMap_->getRemoteIndexList"
1044  << endl;
1045  cerr << os.str ();
1046  }
1047  res = directoryMap_->getRemoteIndexList (globalIDs, dirImages ());
1048  if (verbose) {
1049  std::ostringstream os;
1050  os << *procPrefix << "Director Map getRemoteIndexList out ";
1051  verbosePrintArray(os, dirImages, "PIDs", maxNumToPrint);
1052  os << endl;
1053  cerr << os.str ();
1054  }
1055 
1056  // Check for unfound globalIDs and set corresponding nodeIDs to -1
1057  size_t numMissing = 0;
1058  if (res == IDNotPresent) {
1059  for (size_t i=0; i < numEntries; ++i) {
1060  if (dirImages[i] == -1) {
1061  nodeIDs[i] = -1;
1062  if (computeLIDs) {
1063  localIDs[i] = LINVALID;
1064  }
1065  numMissing++;
1066  }
1067  }
1068  }
1069 
1070  Array<GO> sendGIDs;
1071  Array<int> sendImages;
1072  if (verbose) {
1073  std::ostringstream os;
1074  os << *procPrefix << "Call Distributor::createFromRecvs"
1075  << endl;
1076  cerr << os.str ();
1077  }
1078  distor.createFromRecvs (globalIDs, dirImages (), sendGIDs, sendImages);
1079  if (verbose) {
1080  std::ostringstream os;
1081  os << *procPrefix << "Distributor::createFromRecvs result: "
1082  << "{";
1083  verbosePrintArray(os, sendGIDs, "sendGIDs", maxNumToPrint);
1084  os << ", ";
1085  verbosePrintArray(os, sendImages, "sendPIDs", maxNumToPrint);
1086  os << "}" << endl;
1087  cerr << os.str ();
1088  }
1089  const size_type numSends = sendGIDs.size ();
1090 
1091  //
1092  // mfh 13 Nov 2012:
1093  //
1094  // The code below temporarily stores LO, GO, and int values in
1095  // an array of global_size_t. If one of the signed types (LO
1096  // and GO should both be signed) happened to be -1 (or some
1097  // negative number, but -1 is the one that came up today), then
1098  // conversion to global_size_t will result in a huge
1099  // global_size_t value, and thus conversion back may overflow.
1100  // (Teuchos::as doesn't know that we meant it to be an LO or GO
1101  // all along.)
1102  //
1103  // The overflow normally would not be a problem, since it would
1104  // just go back to -1 again. However, Teuchos::as does range
1105  // checking on conversions in a debug build, so it throws an
1106  // exception (std::range_error) in this case. Range checking is
1107  // generally useful in debug mode, so we don't want to disable
1108  // this behavior globally.
1109  //
1110  // We solve this problem by forgoing use of Teuchos::as for the
1111  // conversions below from LO, GO, or int to global_size_t, and
1112  // the later conversions back from global_size_t to LO, GO, or
1113  // int.
1114  //
1115  // I've recorded this discussion as Bug 5760.
1116  //
1117 
1118  // global_size_t >= GO
1119  // global_size_t >= size_t >= int
1120  // global_size_t >= size_t >= LO
1121  // Therefore, we can safely store all of these in a global_size_t
1122  Array<global_size_t> exports (packetSize * numSends);
1123  {
1124  // Packet format:
1125  // - If computing LIDs: (GID, PID, LID)
1126  // - Otherwise: (GID, PID)
1127  //
1128  // "PID" means "process ID" (a.k.a. "node ID," a.k.a. "rank").
1129 
1130  // Current position to which to write in exports array. If
1131  // sending pairs, we pack the (GID, PID) pair for gid =
1132  // sendGIDs[k] in exports[2*k], exports[2*k+1]. If sending
1133  // triples, we pack the (GID, PID, LID) pair for gid =
1134  // sendGIDs[k] in exports[3*k, 3*k+1, 3*k+2].
1135  size_type exportsIndex = 0;
1136 
1137  if (useHashTables_) {
1138  if (verbose) {
1139  std::ostringstream os;
1140  os << *procPrefix << "Pack exports (useHashTables_ true)"
1141  << endl;
1142  cerr << os.str ();
1143  }
1144  for (size_type gidIndex = 0; gidIndex < numSends; ++gidIndex) {
1145  const GO curGID = sendGIDs[gidIndex];
1146  // Don't use as() here (see above note).
1147  exports[exportsIndex++] = static_cast<global_size_t> (curGID);
1148  const LO curLID = directoryMap_->getLocalElement (curGID);
1149  TEUCHOS_TEST_FOR_EXCEPTION
1150  (curLID == LINVALID, std::logic_error, funcPrefix <<
1151  "Directory Map's global index " << curGID << " lacks "
1152  "a corresponding local index." << errSuffix);
1153  // Don't use as() here (see above note).
1154  exports[exportsIndex++] =
1155  static_cast<global_size_t> (lidToPidTable_->get (curLID));
1156  if (computeLIDs) {
1157  // Don't use as() here (see above note).
1158  exports[exportsIndex++] =
1159  static_cast<global_size_t> (lidToLidTable_->get (curLID));
1160  }
1161  }
1162  }
1163  else {
1164  if (verbose) {
1165  std::ostringstream os;
1166  os << *procPrefix << "Pack exports (useHashTables_ false)"
1167  << endl;
1168  cerr << os.str ();
1169  }
1170  for (size_type gidIndex = 0; gidIndex < numSends; ++gidIndex) {
1171  const GO curGID = sendGIDs[gidIndex];
1172  // Don't use as() here (see above note).
1173  exports[exportsIndex++] = static_cast<global_size_t> (curGID);
1174  const LO curLID = directoryMap_->getLocalElement (curGID);
1175  TEUCHOS_TEST_FOR_EXCEPTION
1176  (curLID == LINVALID, std::logic_error, funcPrefix <<
1177  "Directory Map's global index " << curGID << " lacks "
1178  "a corresponding local index." << errSuffix);
1179  // Don't use as() here (see above note).
1180  exports[exportsIndex++] =
1181  static_cast<global_size_t> (PIDs_[curLID]);
1182  if (computeLIDs) {
1183  // Don't use as() here (see above note).
1184  exports[exportsIndex++] =
1185  static_cast<global_size_t> (LIDs_[curLID]);
1186  }
1187  }
1188  }
1189 
1190  TEUCHOS_TEST_FOR_EXCEPTION
1191  (exportsIndex > exports.size (), std::logic_error,
1192  funcPrefix << "On Process " << comm->getRank () << ", "
1193  "exportsIndex = " << exportsIndex << " > exports.size() = "
1194  << exports.size () << "." << errSuffix);
1195  }
1196 
1197  TEUCHOS_TEST_FOR_EXCEPTION
1198  (numEntries < numMissing, std::logic_error, funcPrefix <<
1199  "On Process " << comm->getRank () << ", numEntries = " <<
1200  numEntries << " < numMissing = " << numMissing << "." <<
1201  errSuffix);
1202 
1203  //
1204  // mfh 13 Nov 2012: See note above on conversions between
1205  // global_size_t and LO, GO, or int.
1206  //
1207  const size_t numRecv = numEntries - numMissing;
1208 
1209  {
1210  const size_t importLen = packetSize * distor.getTotalReceiveLength ();
1211  const size_t requiredImportLen = numRecv * packetSize;
1212  const int myRank = comm->getRank ();
1213  TEUCHOS_TEST_FOR_EXCEPTION(
1214  importLen < requiredImportLen, std::logic_error,
1215  "Tpetra::Details::DistributedNoncontiguousDirectory::getEntriesImpl: "
1216  "On Process " << myRank << ": The 'imports' array must have length "
1217  "at least " << requiredImportLen << ", but its actual length is " <<
1218  importLen << ". numRecv: " << numRecv << ", packetSize: " <<
1219  packetSize << ", numEntries (# GIDs): " << numEntries <<
1220  ", numMissing: " << numMissing << ": distor.getTotalReceiveLength(): "
1221  << distor.getTotalReceiveLength () << ". " << std::endl <<
1222  "Distributor description: " << distor.description () << ". "
1223  << std::endl <<
1224  "Please report this bug to the Tpetra developers.");
1225  }
1226 
1227  Array<global_size_t> imports (packetSize * distor.getTotalReceiveLength ());
1228  // FIXME (mfh 20 Mar 2014) One could overlap the sort2() below
1229  // with communication, by splitting this call into doPosts and
1230  // doWaits. The code is still correct in this form, however.
1231  if (verbose) {
1232  std::ostringstream os;
1233  os << *procPrefix << "Call doPostsAndWaits: {"
1234  << "packetSize: " << packetSize << ", ";
1235  verbosePrintArray(os, exports, "exports", maxNumToPrint);
1236  os << "}" << endl;
1237  cerr << os.str ();
1238  }
1239  distor.doPostsAndWaits (exports ().getConst (), packetSize, imports ());
1240  if (verbose) {
1241  std::ostringstream os;
1242  os << *procPrefix << "doPostsAndWaits result: ";
1243  verbosePrintArray(os, imports, "imports", maxNumToPrint);
1244  os << endl;
1245  cerr << os.str ();
1246  }
1247 
1248  Array<GO> sortedIDs (globalIDs); // deep copy (for later sorting)
1249  Array<GO> offset (numEntries); // permutation array (sort2 output)
1250  for (GO ii = 0; ii < static_cast<GO> (numEntries); ++ii) {
1251  offset[ii] = ii;
1252  }
1253  sort2 (sortedIDs.begin(), sortedIDs.begin() + numEntries, offset.begin());
1254  if (verbose) {
1255  std::ostringstream os;
1256  os << *procPrefix;
1257  verbosePrintArray(os, sortedIDs, "sortedIDs", maxNumToPrint);
1258  os << ", ";
1259  verbosePrintArray(os, offset, "offset", maxNumToPrint);
1260  os << endl;
1261  cerr << os.str ();
1262  }
1263 
1264  size_t importsIndex = 0;
1265 
1266  // we know these conversions are in range, because we loaded this data
1267  //
1268  // Don't use as() for conversions here; we know they are in range.
1269  for (size_t i = 0; i < numRecv; ++i) {
1270  const GO curGID = static_cast<GO> (imports[importsIndex++]);
1271  auto p1 = std::equal_range (sortedIDs.begin(),
1272  sortedIDs.end(), curGID);
1273  if (p1.first != p1.second) {
1274  const size_t j = p1.first - sortedIDs.begin();
1275  nodeIDs[offset[j]] =
1276  static_cast<int> (imports[importsIndex++]);
1277  if (computeLIDs) {
1278  localIDs[offset[j]] =
1279  static_cast<LO> (imports[importsIndex++]);
1280  }
1281  if (nodeIDs[offset[j]] == -1) {
1282  res = IDNotPresent;
1283  }
1284  }
1285  }
1286 
1287  TEUCHOS_TEST_FOR_EXCEPTION
1288  (size_t (importsIndex) > size_t (imports.size ()),
1289  std::logic_error, funcPrefix << "On Process " <<
1290  comm->getRank () << ": importsIndex = " << importsIndex
1291  << " > imports.size() = " << imports.size () << ". "
1292  "numRecv: " << numRecv
1293  << ", packetSize: " << packetSize << ", "
1294  "numEntries (# GIDs): " << numEntries
1295  << ", numMissing: " << numMissing
1296  << ": distor.getTotalReceiveLength(): "
1297  << distor.getTotalReceiveLength () << "." << errSuffix);
1298  if (verbose) {
1299  std::ostringstream os;
1300  os << *procPrefix << funcPrefix << "Done!" << endl;
1301  cerr << os.str ();
1302  }
1303  return res;
1304  }
1305 
1306 
1307  template<class LO, class GO, class NT>
1308  bool
1310  isOneToOne (const Teuchos::Comm<int>& comm) const
1311  {
1312  if (oneToOneResult_ == ONE_TO_ONE_NOT_CALLED_YET) {
1313  const int lcl121 = isLocallyOneToOne () ? 1 : 0;
1314  int gbl121 = 0;
1315  Teuchos::reduceAll<int, int> (comm, Teuchos::REDUCE_MIN, lcl121,
1316  Teuchos::outArg (gbl121));
1317  oneToOneResult_ = (gbl121 == 1) ? ONE_TO_ONE_TRUE : ONE_TO_ONE_FALSE;
1318  }
1319  return (oneToOneResult_ == ONE_TO_ONE_TRUE);
1320  }
1321  } // namespace Details
1322 } // namespace Tpetra
1323 
1324 //
1325 // Explicit instantiation macro
1326 //
1327 // Must be expanded from within the Tpetra namespace!
1328 //
1329 #define TPETRA_DIRECTORYIMPL_INSTANT(LO,GO,NODE) \
1330  namespace Details { \
1331  template class Directory< LO , GO , NODE >; \
1332  template class ReplicatedDirectory< LO , GO , NODE >; \
1333  template class ContiguousUniformDirectory< LO, GO, NODE >; \
1334  template class DistributedContiguousDirectory< LO , GO , NODE >; \
1335  template class DistributedNoncontiguousDirectory< LO , GO , NODE >; \
1336  }
1337 
1338 #endif // TPETRA_DIRECTORYIMPL_DEF_HPP
void doPostsAndWaits(const Teuchos::ArrayView< const Packet > &exports, size_t numPackets, const Teuchos::ArrayView< Packet > &imports)
Execute the (forward) communication plan.
void initialize(int *argc, char ***argv)
Initialize Tpetra.
std::string description() const override
A one-line human-readable description of this object.
Interface for breaking ties in ownership.
std::string description() const
Return a one-line description of this object.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
std::string description() const override
A one-line human-readable description of this object.
Implementation of Directory for a distributed noncontiguous Map.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
bool isOneToOne(const Teuchos::Comm< int > &comm) const override
Whether the Directory&#39;s input Map is (globally) one to one.
Implementation of Directory for a distributed contiguous Map.
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const override
Find process IDs and (optionally) local IDs for the given global IDs.
bool isOneToOne(const Teuchos::Comm< int > &comm) const override
Whether the Directory&#39;s input Map is (globally) one to one.
size_t global_size_t
Global size_t object.
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const override
Find process IDs and (optionally) local IDs for the given global IDs.
LookupStatus getEntries(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const
bool isNodeGlobalElement(global_ordinal_type globalIndex) const
Whether the given global index is owned by this Map on the calling process.
Sets up and executes a communication plan for a Tpetra DistObject.
static bool verbose()
Whether Tpetra is in verbose mode.
size_t getTotalReceiveLength() const
Total number of values this process will receive from other processes.
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const override
Find process IDs and (optionally) local IDs for the given global IDs.
std::string description() const override
A one-line human-readable description of this object.
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2)
Sort the first array, and apply the resulting permutation to the second array.
A parallel distribution of indices over processes.
ReplicatedDirectory()=default
Constructor (that takes no arguments).
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
Stand-alone utility functions and macros.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const override
Find process IDs and (optionally) local IDs for the given global IDs.
Implementation of Directory for a contiguous, uniformly distributed Map.
void createFromRecvs(const Teuchos::ArrayView< const Ordinal > &remoteIDs, const Teuchos::ArrayView< const int > &remoteProcIDs, Teuchos::Array< Ordinal > &exportIDs, Teuchos::Array< int > &exportProcIDs)
Set up Distributor using list of process ranks from which to receive.
std::string description() const override
A one-line human-readable description of this object.
global_ordinal_type getMinAllGlobalIndex() const
The minimum global index over all processes in the communicator.
global_size_t getGlobalNumElements() const
The number of elements in this Map.
Description of Tpetra&#39;s behavior.