Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Tpetra_DirectoryImpl_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef __Tpetra_DirectoryImpl_def_hpp
43 #define __Tpetra_DirectoryImpl_def_hpp
44 
47 
48 #include "Tpetra_Distributor.hpp"
49 #include "Tpetra_Map.hpp"
50 #include "Tpetra_TieBreak.hpp"
51 #include "Tpetra_Details_FixedHashTable.hpp"
52 #include "Teuchos_Comm.hpp"
53 #include <memory>
54 #include <sstream>
55 
56 // FIXME (mfh 16 Apr 2013) GIANT HACK BELOW
57 #ifdef HAVE_TPETRACORE_MPI
58 # include <mpi.h>
59 #endif // HAVE_TPETRACORE_MPI
60 // FIXME (mfh 16 Apr 2013) GIANT HACK ABOVE
61 
62 namespace Tpetra {
63  namespace Details {
64  template<class LO, class GO, class NT>
67  getEntries (const map_type& map,
68  const Teuchos::ArrayView<const GO> &globalIDs,
69  const Teuchos::ArrayView<int> &nodeIDs,
70  const Teuchos::ArrayView<LO> &localIDs,
71  const bool computeLIDs) const
72  {
73  // Ensure that globalIDs, nodeIDs, and localIDs (if applicable)
74  // all have the same size, before modifying any output arguments.
75  TEUCHOS_TEST_FOR_EXCEPTION(nodeIDs.size() != globalIDs.size(),
76  std::invalid_argument, Teuchos::typeName(*this) << "::getEntries(): "
77  "Output arrays do not have the right sizes. nodeIDs.size() = "
78  << nodeIDs.size() << " != globalIDs.size() = " << globalIDs.size()
79  << ".");
80  TEUCHOS_TEST_FOR_EXCEPTION(
81  computeLIDs && localIDs.size() != globalIDs.size(),
82  std::invalid_argument, Teuchos::typeName(*this) << "::getEntries(): "
83  "Output array do not have the right sizes. localIDs.size() = "
84  << localIDs.size() << " != globalIDs.size() = " << globalIDs.size()
85  << ".");
86 
87  // Initially, fill nodeIDs and localIDs (if applicable) with
88  // invalid values. The "invalid" process ID is -1 (this means
89  // the same thing as MPI_ANY_SOURCE to Teuchos, so it's an
90  // "invalid" process ID); the invalid local ID comes from
91  // OrdinalTraits.
92  std::fill (nodeIDs.begin(), nodeIDs.end(), -1);
93  if (computeLIDs) {
94  std::fill (localIDs.begin(), localIDs.end(),
95  Teuchos::OrdinalTraits<LO>::invalid ());
96  }
97  // Actually do the work.
98  return this->getEntriesImpl (map, globalIDs, nodeIDs, localIDs, computeLIDs);
99  }
100 
101 
102  template<class LO, class GO, class NT>
105  numProcs_ (map.getComm ()->getSize ())
106  {}
107 
108 
109  template<class LO, class GO, class NT>
110  bool
112  isOneToOne (const Teuchos::Comm<int>& /* comm */) const
113  {
114  // A locally replicated Map is one-to-one only if there is no
115  // replication, that is, only if the Map's communicator only has
116  // one process.
117  return (numProcs_ == 1);
118  }
119 
120 
121  template<class LO, class GO, class NT>
122  std::string
124  {
125  std::ostringstream os;
126  os << "ReplicatedDirectory"
127  << "<" << Teuchos::TypeNameTraits<LO>::name ()
128  << ", " << Teuchos::TypeNameTraits<GO>::name ()
129  << ", " << Teuchos::TypeNameTraits<NT>::name () << ">";
130  return os.str ();
131  }
132 
133 
134  template<class LO, class GO, class NT>
136  ContiguousUniformDirectory (const map_type& map)
137  {
138  TEUCHOS_TEST_FOR_EXCEPTION(! map.isContiguous (), std::invalid_argument,
139  Teuchos::typeName (*this) << " constructor: Map is not contiguous.");
140  TEUCHOS_TEST_FOR_EXCEPTION(! map.isUniform (), std::invalid_argument,
141  Teuchos::typeName (*this) << " constructor: Map is not uniform.");
142  }
143 
144 
145  template<class LO, class GO, class NT>
146  std::string
148  {
149  std::ostringstream os;
150  os << "ContiguousUniformDirectory"
151  << "<" << Teuchos::TypeNameTraits<LO>::name ()
152  << ", " << Teuchos::TypeNameTraits<GO>::name ()
153  << ", " << Teuchos::TypeNameTraits<NT>::name () << ">";
154  return os.str ();
155  }
156 
157 
158  template<class LO, class GO, class NT>
162  const Teuchos::ArrayView<const GO> &globalIDs,
163  const Teuchos::ArrayView<int> &nodeIDs,
164  const Teuchos::ArrayView<LO> &localIDs,
165  const bool computeLIDs) const
166  {
167  using Teuchos::Comm;
168  using Teuchos::RCP;
169  typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
170  const LO invalidLid = Teuchos::OrdinalTraits<LO>::invalid ();
172 
173  RCP<const Comm<int> > comm = map.getComm ();
174  const GO g_min = map.getMinAllGlobalIndex ();
175 
176  // Let N_G be the global number of elements in the Map,
177  // and P be the number of processes in its communicator.
178  // Then, N_G = P * N_L + R = R*(N_L + 1) + (P - R)*N_L.
179  //
180  // The first R processes own N_L+1 elements.
181  // The remaining P-R processes own N_L elements.
182  //
183  // Let g be the current GID, g_min be the global minimum GID,
184  // and g_0 = g - g_min. If g is a valid GID in this Map, then
185  // g_0 is in [0, N_G - 1].
186  //
187  // If g is a valid GID in this Map and g_0 < R*(N_L + 1), then
188  // the rank of the process that owns g is floor(g_0 / (N_L +
189  // 1)), and its corresponding local index on that process is g_0
190  // mod (N_L + 1).
191  //
192  // Let g_R = g_0 - R*(N_L + 1). If g is a valid GID in this Map
193  // and g_0 >= R*(N_L + 1), then the rank of the process that
194  // owns g is then R + floor(g_R / N_L), and its corresponding
195  // local index on that process is g_R mod N_L.
196 
197  const size_type N_G =
198  static_cast<size_type> (map.getGlobalNumElements ());
199  const size_type P = static_cast<size_type> (comm->getSize ());
200  const size_type N_L = N_G / P;
201  const size_type R = N_G - N_L * P; // N_G mod P
202  const size_type N_R = R * (N_L + static_cast<size_type> (1));
203 
204 #ifdef HAVE_TPETRA_DEBUG
205  TEUCHOS_TEST_FOR_EXCEPTION(
206  N_G != P*N_L + R, std::logic_error,
207  "Tpetra::ContiguousUniformDirectory::getEntriesImpl: "
208  "N_G = " << N_G << " != P*N_L + R = " << P << "*" << N_L << " + " << R
209  << " = " << P*N_L + R << ". "
210  "Please report this bug to the Tpetra developers.");
211 #endif // HAVE_TPETRA_DEBUG
212 
213  const size_type numGids = globalIDs.size (); // for const loop bound
214  // Avoid signed/unsigned comparisons below, in case GO is
215  // unsigned. (Integer literals are generally signed.)
216  const GO ONE = static_cast<GO> (1);
217 
218  if (computeLIDs) {
219  for (size_type k = 0; k < numGids; ++k) {
220  const GO g_0 = globalIDs[k] - g_min;
221 
222  // The first test is a little strange just in case GO is
223  // unsigned. Compilers raise a warning on tests like "x <
224  // 0" if x is unsigned, but don't usually raise a warning if
225  // the expression is a bit more complicated than that.
226  if (g_0 + ONE < ONE || g_0 >= static_cast<GO> (N_G)) {
227  nodeIDs[k] = -1;
228  localIDs[k] = invalidLid;
229  res = IDNotPresent;
230  }
231  else if (g_0 < static_cast<GO> (N_R)) {
232  // The GID comes from the initial sequence of R processes.
233  nodeIDs[k] = static_cast<int> (g_0 / static_cast<GO> (N_L + 1));
234  localIDs[k] = static_cast<LO> (g_0 % static_cast<GO> (N_L + 1));
235  }
236  else if (g_0 >= static_cast<GO> (N_R)) {
237  // The GID comes from the remaining P-R processes.
238  const GO g_R = g_0 - static_cast<GO> (N_R);
239  nodeIDs[k] = static_cast<int> (R + g_R / N_L);
240  localIDs[k] = static_cast<int> (g_R % N_L);
241  }
242 #ifdef HAVE_TPETRA_DEBUG
243  else {
244  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
245  "Tpetra::ContiguousUniformDirectory::getEntriesImpl: "
246  "should never get here. "
247  "Please report this bug to the Tpetra developers.");
248  }
249 #endif // HAVE_TPETRA_DEBUG
250  }
251  }
252  else { // don't compute local indices
253  for (size_type k = 0; k < numGids; ++k) {
254  const GO g_0 = globalIDs[k] - g_min;
255  // The first test is a little strange just in case GO is
256  // unsigned. Compilers raise a warning on tests like "x <
257  // 0" if x is unsigned, but don't usually raise a warning if
258  // the expression is a bit more complicated than that.
259  if (g_0 + ONE < ONE || g_0 >= static_cast<GO> (N_G)) {
260  nodeIDs[k] = -1;
261  res = IDNotPresent;
262  }
263  else if (g_0 < static_cast<GO> (N_R)) {
264  // The GID comes from the initial sequence of R processes.
265  nodeIDs[k] = static_cast<int> (g_0 / static_cast<GO> (N_L + 1));
266  }
267  else if (g_0 >= static_cast<GO> (N_R)) {
268  // The GID comes from the remaining P-R processes.
269  const GO g_R = g_0 - static_cast<GO> (N_R);
270  nodeIDs[k] = static_cast<int> (R + g_R / N_L);
271  }
272 #ifdef HAVE_TPETRA_DEBUG
273  else {
274  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
275  "Tpetra::ContiguousUniformDirectory::getEntriesImpl: "
276  "should never get here. "
277  "Please report this bug to the Tpetra developers.");
278  }
279 #endif // HAVE_TPETRA_DEBUG
280  }
281  }
282  return res;
283  }
284 
285  template<class LO, class GO, class NT>
287  DistributedContiguousDirectory (const map_type& map)
288  {
289  using Teuchos::arcp;
290  using Teuchos::gatherAll;
291  using Teuchos::RCP;
292 
293  RCP<const Teuchos::Comm<int> > comm = map.getComm ();
294 
295  TEUCHOS_TEST_FOR_EXCEPTION(! map.isDistributed (), std::invalid_argument,
296  Teuchos::typeName (*this) << " constructor: Map is not distributed.");
297  TEUCHOS_TEST_FOR_EXCEPTION(! map.isContiguous (), std::invalid_argument,
298  Teuchos::typeName (*this) << " constructor: Map is not contiguous.");
299 
300  const int numProcs = comm->getSize ();
301 
302  // Make room for the min global ID on each process, plus one
303  // entry at the end for the "max cap."
304  allMinGIDs_ = arcp<GO> (numProcs + 1);
305  // Get my process' min global ID.
306  GO minMyGID = map.getMinGlobalIndex ();
307  // Gather all of the min global IDs into the first numProcs
308  // entries of allMinGIDs_.
309 
310  // FIXME (mfh 16 Apr 2013) GIANT HACK BELOW
311  //
312  // The purpose of this giant hack is that gatherAll appears to
313  // interpret the "receive count" argument differently than
314  // MPI_Allgather does. Matt Bettencourt reports Valgrind issues
315  // (memcpy with overlapping data) with MpiComm<int>::gatherAll,
316  // which could relate either to this, or to OpenMPI.
317 #ifdef HAVE_TPETRACORE_MPI
318  MPI_Datatype rawMpiType = MPI_INT;
319  bool useRawMpi = true;
320  if (typeid (GO) == typeid (int)) {
321  rawMpiType = MPI_INT;
322  } else if (typeid (GO) == typeid (long)) {
323  rawMpiType = MPI_LONG;
324  } else {
325  useRawMpi = false;
326  }
327  if (useRawMpi) {
328  using Teuchos::rcp_dynamic_cast;
329  using Teuchos::MpiComm;
330  RCP<const MpiComm<int> > mpiComm =
331  rcp_dynamic_cast<const MpiComm<int> > (comm);
332  // It could be a SerialComm instead, even in an MPI build, so
333  // be sure to check.
334  if (! comm.is_null ()) {
335  MPI_Comm rawMpiComm = * (mpiComm->getRawMpiComm ());
336  const int err =
337  MPI_Allgather (&minMyGID, 1, rawMpiType,
338  allMinGIDs_.getRawPtr (), 1, rawMpiType,
339  rawMpiComm);
340  TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
341  "Tpetra::DistributedContiguousDirectory: MPI_Allgather failed");
342  } else {
343  gatherAll<int, GO> (*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr ());
344  }
345  } else {
346  gatherAll<int, GO> (*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr ());
347  }
348 #else // NOT HAVE_TPETRACORE_MPI
349  gatherAll<int, GO> (*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr ());
350 #endif // HAVE_TPETRACORE_MPI
351  // FIXME (mfh 16 Apr 2013) GIANT HACK ABOVE
352 
353  //gatherAll<int, GO> (*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr ());
354 
355  // Put the max cap at the end. Adding one lets us write loops
356  // over the global IDs with the usual strict less-than bound.
357  allMinGIDs_[numProcs] = map.getMaxAllGlobalIndex ()
358  + Teuchos::OrdinalTraits<GO>::one ();
359  }
360 
361  template<class LO, class GO, class NT>
362  std::string
364  {
365  std::ostringstream os;
366  os << "DistributedContiguousDirectory"
367  << "<" << Teuchos::TypeNameTraits<LO>::name ()
368  << ", " << Teuchos::TypeNameTraits<GO>::name ()
369  << ", " << Teuchos::TypeNameTraits<NT>::name () << ">";
370  return os.str ();
371  }
372 
373  template<class LO, class GO, class NT>
377  const Teuchos::ArrayView<const GO> &globalIDs,
378  const Teuchos::ArrayView<int> &nodeIDs,
379  const Teuchos::ArrayView<LO> &localIDs,
380  const bool computeLIDs) const
381  {
382  using Teuchos::Array;
383  using Teuchos::ArrayRCP;
384  using Teuchos::ArrayView;
385  using Teuchos::as;
386  using Teuchos::Comm;
387  using Teuchos::RCP;
388 
390  RCP<const Teuchos::Comm<int> > comm = map.getComm ();
391  const int myRank = comm->getRank ();
392 
393  // Map is on one process or is locally replicated.
394  typename ArrayView<int>::iterator procIter = nodeIDs.begin();
395  typename ArrayView<LO>::iterator lidIter = localIDs.begin();
396  typename ArrayView<const GO>::iterator gidIter;
397  for (gidIter = globalIDs.begin(); gidIter != globalIDs.end(); ++gidIter) {
398  if (map.isNodeGlobalElement (*gidIter)) {
399  *procIter++ = myRank;
400  if (computeLIDs) {
401  *lidIter++ = map.getLocalElement (*gidIter);
402  }
403  }
404  else {
405  // Advance the pointers, leaving these values set to invalid
406  procIter++;
407  if (computeLIDs) {
408  lidIter++;
409  }
410  res = IDNotPresent;
411  }
412  }
413  return res;
414  }
415 
416  template<class LO, class GO, class NT>
420  const Teuchos::ArrayView<const GO> &globalIDs,
421  const Teuchos::ArrayView<int> &nodeIDs,
422  const Teuchos::ArrayView<LO> &localIDs,
423  const bool computeLIDs) const
424  {
425  using Teuchos::Array;
426  using Teuchos::ArrayRCP;
427  using Teuchos::ArrayView;
428  using Teuchos::as;
429  using Teuchos::Comm;
430  using Teuchos::RCP;
431 
432  RCP<const Teuchos::Comm<int> > comm = map.getComm ();
433  const int numProcs = comm->getSize ();
434  const global_size_t nOverP = map.getGlobalNumElements () / numProcs;
435  const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid();
437 
438  // Map is distributed but contiguous.
439  typename ArrayView<int>::iterator procIter = nodeIDs.begin();
440  typename ArrayView<LO>::iterator lidIter = localIDs.begin();
441  typename ArrayView<const GO>::iterator gidIter;
442  for (gidIter = globalIDs.begin(); gidIter != globalIDs.end(); ++gidIter) {
443  LO LID = LINVALID; // Assume not found until proven otherwise
444  int image = -1;
445  GO GID = *gidIter;
446  // Guess uniform distribution and start a little above it
447  // TODO: replace by a binary search
448  int curRank;
449  { // We go through all this trouble to avoid overflow and
450  // signed / unsigned casting mistakes (that were made in
451  // previous versions of this code).
452  const GO one = as<GO> (1);
453  const GO two = as<GO> (2);
454  const GO nOverP_GID = as<GO> (nOverP);
455  const GO lowerBound = GID / std::max(nOverP_GID, one) + two;
456  curRank = as<int>(std::min(lowerBound, as<GO>(numProcs - 1)));
457  }
458  bool found = false;
459  while (curRank >= 0 && curRank < numProcs) {
460  if (allMinGIDs_[curRank] <= GID) {
461  if (GID < allMinGIDs_[curRank + 1]) {
462  found = true;
463  break;
464  }
465  else {
466  curRank++;
467  }
468  }
469  else {
470  curRank--;
471  }
472  }
473  if (found) {
474  image = curRank;
475  LID = as<LO> (GID - allMinGIDs_[image]);
476  }
477  else {
478  res = IDNotPresent;
479  }
480  *procIter++ = image;
481  if (computeLIDs) {
482  *lidIter++ = LID;
483  }
484  }
485  return res;
486  }
487 
488  template<class LO, class GO, class NT>
490  DistributedNoncontiguousDirectory (const map_type& map) :
491  oneToOneResult_ (ONE_TO_ONE_NOT_CALLED_YET), // to be revised below
492  locallyOneToOne_ (true), // to be revised below
493  useHashTables_ (false) // to be revised below
494  {
495  initialize (map, Teuchos::null);
496  }
497 
498  template<class LO, class GO, class NT>
499  DistributedNoncontiguousDirectory<LO, GO, NT>::
500  DistributedNoncontiguousDirectory (const map_type& map,
501  const tie_break_type& tie_break) :
502  oneToOneResult_ (ONE_TO_ONE_NOT_CALLED_YET), // to be revised below
503  locallyOneToOne_ (true), // to be revised below
504  useHashTables_ (false) // to be revised below
505  {
506  initialize (map, Teuchos::ptrFromRef (tie_break));
507  }
508 
509  template<class LO, class GO, class NT>
510  void
511  DistributedNoncontiguousDirectory<LO, GO, NT>::
512  initialize (const map_type& map,
513  Teuchos::Ptr<const tie_break_type> tie_break)
514  {
515  using Teuchos::arcp;
516  using Teuchos::Array;
517  using Teuchos::ArrayRCP;
518  using Teuchos::ArrayView;
519  using Teuchos::as;
520  using Teuchos::RCP;
521  using Teuchos::rcp;
522  using Teuchos::typeName;
523  using Teuchos::TypeNameTraits;
524  using std::cerr;
525  using std::endl;
526  typedef Array<int>::size_type size_type;
527 
528  // This class' implementation of getEntriesImpl() currently
529  // encodes the following assumptions:
530  //
531  // 1. global_size_t >= GO
532  // 2. global_size_t >= int
533  // 3. global_size_t >= LO
534  //
535  // We check these assumptions here.
536  TEUCHOS_TEST_FOR_EXCEPTION(sizeof(global_size_t) < sizeof(GO),
537  std::logic_error, typeName (*this) << ": sizeof(Tpetra::"
538  "global_size_t) = " << sizeof(global_size_t) << " < sizeof(Global"
539  "Ordinal = " << TypeNameTraits<LO>::name () << ") = " << sizeof(GO)
540  << ".");
541  TEUCHOS_TEST_FOR_EXCEPTION(sizeof(global_size_t) < sizeof(int),
542  std::logic_error, typeName (*this) << ": sizeof(Tpetra::"
543  "global_size_t) = " << sizeof(global_size_t) << " < sizeof(int) = "
544  << sizeof(int) << ".");
545  TEUCHOS_TEST_FOR_EXCEPTION(sizeof(global_size_t) < sizeof(LO),
546  std::logic_error, typeName (*this) << ": sizeof(Tpetra::"
547  "global_size_t) = " << sizeof(global_size_t) << " < sizeof(Local"
548  "Ordinal = " << TypeNameTraits<LO>::name () << ") = " << sizeof(LO)
549  << ".");
550 
551  RCP<const Teuchos::Comm<int> > comm = map.getComm ();
552  const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid ();
553  const GO minAllGID = map.getMinAllGlobalIndex ();
554  const GO maxAllGID = map.getMaxAllGlobalIndex ();
555 
556  // The "Directory Map" (see below) will have a range of elements
557  // from the minimum to the maximum GID of the user Map, and a
558  // minimum GID of minAllGID from the user Map. It doesn't
559  // actually have to store all those entries, though do beware of
560  // calling getNodeElementList on it (see Bug 5822).
561  const global_size_t numGlobalEntries = maxAllGID - minAllGID + 1;
562 
563  // We can't afford to replicate the whole directory on each
564  // process, so create the "Directory Map", a uniform contiguous
565  // Map that describes how we will distribute the directory over
566  // processes.
567  //
568  // FIXME (mfh 08 May 2012) Here we're setting minAllGID to be
569  // the index base. The index base should be separate from the
570  // minimum GID.
571  directoryMap_ = rcp (new map_type (numGlobalEntries, minAllGID, comm,
572  GloballyDistributed));
573  // The number of Directory elements that my process owns.
574  const size_t dir_numMyEntries = directoryMap_->getNodeNumElements ();
575 
576  // Fix for Bug 5822: If the input Map is "sparse," that is if
577  // the difference between the global min and global max GID is
578  // much larger than the global number of elements in the input
579  // Map, then it's possible that the Directory Map might have
580  // many more entries than the input Map on this process. This
581  // can cause memory scalability issues. In that case, we switch
582  // from the array-based implementation of Directory storage to
583  // the hash table - based implementation. We don't use hash
584  // tables all the time, because they are slower in the common
585  // case of a nonsparse Map.
586  //
587  // NOTE: This is a per-process decision. Some processes may use
588  // array-based storage, whereas others may use hash table -
589  // based storage.
590 
591  // A hash table takes a constant factor more space, more or
592  // less, than an array. Thus, it's not worthwhile, even in
593  // terms of memory usage, always to use a hash table.
594  // Furthermore, array lookups are faster than hash table
595  // lookups, so it may be worthwhile to use an array even if it
596  // takes more space. The "sparsity threshold" governs when to
597  // switch to a hash table - based implementation.
598  const size_t inverseSparsityThreshold = 10;
599  useHashTables_ =
600  (dir_numMyEntries >= inverseSparsityThreshold * map.getNodeNumElements());
601 
602  // Get list of process IDs that own the directory entries for the
603  // Map GIDs. These will be the targets of the sends that the
604  // Distributor will do.
605  const int myRank = comm->getRank ();
606  const size_t numMyEntries = map.getNodeNumElements ();
607  Array<int> sendImageIDs (numMyEntries);
608  ArrayView<const GO> myGlobalEntries = map.getNodeElementList ();
609  // An ID not present in this lookup indicates that it lies outside
610  // of the range [minAllGID,maxAllGID] (from map_). this means
611  // something is wrong with map_, our fault.
612  const LookupStatus lookupStatus =
613  directoryMap_->getRemoteIndexList (myGlobalEntries, sendImageIDs);
614  TEUCHOS_TEST_FOR_EXCEPTION(
615  lookupStatus == IDNotPresent, std::logic_error, Teuchos::typeName(*this)
616  << " constructor: the Directory Map could not find out where one or "
617  "more of my Map's indices should go. The input to getRemoteIndexList "
618  "is " << Teuchos::toString (myGlobalEntries) << ", and the output is "
619  << Teuchos::toString (sendImageIDs ()) << ". The input Map itself has "
620  "the following entries on the calling process " <<
621  map.getComm ()->getRank () << ": " <<
622  Teuchos::toString (map.getNodeElementList ()) << ", and has "
623  << map.getGlobalNumElements () << " total global indices in ["
624  << map.getMinAllGlobalIndex () << "," << map.getMaxAllGlobalIndex ()
625  << "]. The Directory Map has "
626  << directoryMap_->getGlobalNumElements () << " total global indices in "
627  "[" << directoryMap_->getMinAllGlobalIndex () << "," <<
628  directoryMap_->getMaxAllGlobalIndex () << "], and the calling process "
629  "has GIDs [" << directoryMap_->getMinGlobalIndex () << "," <<
630  directoryMap_->getMaxGlobalIndex () << "]. "
631  "This probably means there is a bug in Map or Directory. "
632  "Please report this bug to the Tpetra developers.");
633 
634  // Initialize the distributor using the list of process IDs to
635  // which to send. We'll use the distributor to send out triples
636  // of (GID, process ID, LID). We're sending the entries to the
637  // processes that the Directory Map says should own them, which is
638  // why we called directoryMap_->getRemoteIndexList() above.
639  Distributor distor (comm);
640  const size_t numReceives = distor.createFromSends (sendImageIDs);
641 
642  // NOTE (mfh 21 Mar 2012) The following code assumes that
643  // sizeof(GO) >= sizeof(int) and sizeof(GO) >= sizeof(LO).
644  //
645  // Create and fill buffer of (GID, PID, LID) triples to send
646  // out. We pack the (GID, PID, LID) triples into a single Array
647  // of GO, casting the PID from int to GO and the LID from LO to
648  // GO as we do so.
649  //
650  // FIXME (mfh 23 Mar 2014) This assumes that sizeof(LO) <=
651  // sizeof(GO) and sizeof(int) <= sizeof(GO). The former is
652  // required, and the latter is generally the case, but we should
653  // still check for this.
654  const int packetSize = 3; // We're sending triples, so packet size is 3.
655  Array<GO> exportEntries (packetSize * numMyEntries); // data to send out
656  {
657  size_type exportIndex = 0;
658  for (size_type i = 0; i < static_cast<size_type> (numMyEntries); ++i) {
659  exportEntries[exportIndex++] = myGlobalEntries[i];
660  exportEntries[exportIndex++] = as<GO> (myRank);
661  exportEntries[exportIndex++] = as<GO> (i);
662  }
663  }
664  // Buffer of data to receive. The Distributor figured out for
665  // us how many packets we're receiving, when we called its
666  // createFromSends() method to set up the distribution plan.
667  Array<GO> importElements (packetSize * distor.getTotalReceiveLength ());
668 
669  // Distribute the triples of (GID, process ID, LID).
670  distor.doPostsAndWaits (exportEntries ().getConst (), packetSize, importElements ());
671 
672  // Unpack the redistributed data. Both implementations of
673  // Directory storage map from an LID in the Directory Map (which
674  // is the LID of the GID to store) to either a PID or an LID in
675  // the input Map. Each "packet" (contiguous chunk of
676  // importElements) contains a triple: (GID, PID, LID).
677  if (useHashTables_) {
678  // Create the hash tables. We know exactly how many elements
679  // to expect in each hash table. FixedHashTable's constructor
680  // currently requires all the keys and values at once, so we
681  // have to extract them in temporary arrays. It may be
682  // possible to rewrite FixedHashTable to use a "start fill" /
683  // "end fill" approach that avoids the temporary arrays, but
684  // we won't try that for now.
685 
686  // The constructors of Array and ArrayRCP that take a number
687  // of elements all initialize the arrays. Instead, allocate
688  // raw arrays, then hand them off to ArrayRCP, to avoid the
689  // initial unnecessary initialization without losing the
690  // benefit of exception safety (and bounds checking, in a
691  // debug build).
692  LO* tableKeysRaw = NULL;
693  LO* tableLidsRaw = NULL;
694  int* tablePidsRaw = NULL;
695  try {
696  tableKeysRaw = new LO [numReceives];
697  tableLidsRaw = new LO [numReceives];
698  tablePidsRaw = new int [numReceives];
699  } catch (...) {
700  if (tableKeysRaw != NULL) {
701  delete [] tableKeysRaw;
702  }
703  if (tableLidsRaw != NULL) {
704  delete [] tableLidsRaw;
705  }
706  if (tablePidsRaw != NULL) {
707  delete [] tablePidsRaw;
708  }
709  throw;
710  }
711  ArrayRCP<LO> tableKeys (tableKeysRaw, 0, numReceives, true);
712  ArrayRCP<LO> tableLids (tableLidsRaw, 0, numReceives, true);
713  ArrayRCP<int> tablePids (tablePidsRaw, 0, numReceives, true);
714 
715  if (tie_break.is_null ()) {
716  // Fill the temporary arrays of keys and values.
717  size_type importIndex = 0;
718  for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
719  const GO curGID = importElements[importIndex++];
720  const LO curLID = directoryMap_->getLocalElement (curGID);
721  TEUCHOS_TEST_FOR_EXCEPTION(
722  curLID == LINVALID, std::logic_error,
723  Teuchos::typeName(*this) << " constructor: Incoming global index "
724  << curGID << " does not have a corresponding local index in the "
725  "Directory Map. Please report this bug to the Tpetra developers.");
726  tableKeys[i] = curLID;
727  tablePids[i] = importElements[importIndex++];
728  tableLids[i] = importElements[importIndex++];
729  }
730  // Set up the hash tables. The hash tables' constructor
731  // detects whether there are duplicates, so that we can set
732  // locallyOneToOne_.
733  typedef Kokkos::Device<typename NT::execution_space,
734  typename NT::memory_space> DT;
735  lidToPidTable_ =
736  rcp (new Details::FixedHashTable<LO, int, DT> (tableKeys (),
737  tablePids ()));
738  locallyOneToOne_ = ! (lidToPidTable_->hasDuplicateKeys ());
739  lidToLidTable_ =
740  rcp (new Details::FixedHashTable<LO, LO, DT> (tableKeys (),
741  tableLids ()));
742  }
743  else { // tie_break is NOT null
744 
745  // For each directory Map LID received, collect all the
746  // corresponding (PID,LID) pairs. If the input Map is not
747  // one-to-one, corresponding directory Map LIDs will have
748  // more than one pair. In that case, we will use the
749  // TieBreak object to pick exactly one pair.
750  typedef std::map<LO, std::vector<std::pair<int, LO> > > pair_table_type;
751  pair_table_type ownedPidLidPairs;
752 
753  // For each directory Map LID received, collect the zero or
754  // more input Map (PID,LID) pairs into ownedPidLidPairs.
755  size_type importIndex = 0;
756  for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
757  const GO curGID = importElements[importIndex++];
758  const LO dirMapLid = directoryMap_->getLocalElement (curGID);
759  TEUCHOS_TEST_FOR_EXCEPTION(
760  dirMapLid == LINVALID, std::logic_error,
761  Teuchos::typeName(*this) << " constructor: Incoming global index "
762  << curGID << " does not have a corresponding local index in the "
763  "Directory Map. Please report this bug to the Tpetra developers.");
764  tableKeys[i] = dirMapLid;
765  const int PID = importElements[importIndex++];
766  const int LID = importElements[importIndex++];
767 
768  // These may change below. We fill them in just to ensure
769  // that they won't have invalid values.
770  tablePids[i] = PID;
771  tableLids[i] = LID;
772 
773  // For every directory Map LID, we have to remember all
774  // (PID, LID) pairs. The TieBreak object will arbitrate
775  // between them in the loop below.
776  ownedPidLidPairs[dirMapLid].push_back (std::make_pair (PID, LID));
777  }
778 
779  // Use TieBreak to arbitrate between (PID,LID) pairs
780  // corresponding to each directory Map LID.
781  //
782  // FIXME (mfh 23 Mar 2014) How do I know that i is the same
783  // as the directory Map LID?
784  // KDD 21 Mar 2018: It isn't, especially if the user's IDs are not
785  // contiguous, but the directory map is. Need to set tablePids[i]
786  // and tableLids[i], so need to loop over numReceives (as that is
787  // how those arrays are allocated). FIXED
788 
789  for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
790  const LO dirMapLid = tableKeys[i];
791  const std::vector<std::pair<int, LO> >& pidLidList =
792  ownedPidLidPairs[dirMapLid];
793  const size_t listLen = pidLidList.size();
794  if (listLen == 0) continue; // KDD This will never happen
795  const GO dirMapGid = directoryMap_->getGlobalElement (dirMapLid);
796  if (listLen > 1) {
797  locallyOneToOne_ = false;
798  }
799  // If there is some (PID,LID) pair for the current input
800  // Map LID, then it makes sense to invoke the TieBreak
801  // object to arbitrate between the options. Even if
802  // there is only one (PID,LID) pair, we still want to
803  // give the TieBreak object a chance to do whatever it
804  // likes to do, in terms of side effects (e.g., track
805  // (PID,LID) pairs).
806  const size_type index =
807  static_cast<size_type> (tie_break->selectedIndex (dirMapGid,
808  pidLidList));
809  tablePids[i] = pidLidList[index].first;
810  tableLids[i] = pidLidList[index].second;
811  }
812 
813  // Set up the hash tables.
814  typedef Kokkos::Device<typename NT::execution_space,
815  typename NT::memory_space> DT;
816  lidToPidTable_ =
817  rcp (new Details::FixedHashTable<LO, int, DT> (tableKeys (),
818  tablePids ()));
819  lidToLidTable_ =
820  rcp (new Details::FixedHashTable<LO, LO, DT> (tableKeys (),
821  tableLids ()));
822  }
823  }
824  else {
825  if (tie_break.is_null ()) {
826  // Use array-based implementation of Directory storage.
827  // Allocate these arrays and fill them with invalid values,
828  // in case the input Map's GID list is sparse (i.e., does
829  // not populate all GIDs from minAllGID to maxAllGID).
830  PIDs_ = arcp<int> (dir_numMyEntries);
831  std::fill (PIDs_.begin (), PIDs_.end (), -1);
832  LIDs_ = arcp<LO> (dir_numMyEntries);
833  std::fill (LIDs_.begin (), LIDs_.end (), LINVALID);
834  // Fill in the arrays with PIDs resp. LIDs.
835  size_type importIndex = 0;
836  for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
837  const GO curGID = importElements[importIndex++];
838  const LO curLID = directoryMap_->getLocalElement (curGID);
839  TEUCHOS_TEST_FOR_EXCEPTION(curLID == LINVALID, std::logic_error,
840  Teuchos::typeName(*this) << " constructor: Incoming global index "
841  << curGID << " does not have a corresponding local index in the "
842  "Directory Map. Please report this bug to the Tpetra developers.");
843 
844  // If PIDs_[curLID] is not -1, then curGID is a duplicate
845  // on the calling process, so the Directory is not locally
846  // one-to-one.
847  if (PIDs_[curLID] != -1) {
848  locallyOneToOne_ = false;
849  }
850  PIDs_[curLID] = importElements[importIndex++];
851  LIDs_[curLID] = importElements[importIndex++];
852  }
853  }
854  else {
855  PIDs_ = arcp<int> (dir_numMyEntries);
856  LIDs_ = arcp<LO> (dir_numMyEntries);
857  std::fill (PIDs_.begin (), PIDs_.end (), -1);
858 
859  // All received (PID, LID) pairs go into ownedPidLidPairs.
860  // This is a map from the directory Map's LID to the (PID,
861  // LID) pair (where the latter LID comes from the input Map,
862  // not the directory Map). If the input Map is not
863  // one-to-one, corresponding LIDs will have
864  // ownedPidLidPairs[curLID].size() > 1. In that case, we
865  // will use the TieBreak object to pick exactly one pair.
866  Array<std::vector<std::pair<int, LO> > > ownedPidLidPairs (dir_numMyEntries);
867  size_type importIndex = 0;
868  for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
869  const GO GID = importElements[importIndex++];
870  const int PID = importElements[importIndex++];
871  const LO LID = importElements[importIndex++];
872 
873  const LO dirMapLid = directoryMap_->getLocalElement (GID);
874  TEUCHOS_TEST_FOR_EXCEPTION(
875  dirMapLid == LINVALID, std::logic_error,
876  Teuchos::typeName(*this) << " constructor: Incoming global index "
877  << GID << " does not have a corresponding local index in the "
878  "Directory Map. Please report this bug to the Tpetra developers.");
879  ownedPidLidPairs[dirMapLid].push_back (std::make_pair (PID, LID));
880  }
881 
882  // Use TieBreak to arbitrate between (PID,LID) pairs
883  // corresponding to each directory Map LID.
884  //
885  // FIXME (mfh 23 Mar 2014) How do I know that i is the same
886  // as the directory Map LID?
887  // KDD 21 Mar 2018: It isn't, especially if the user's IDs are not
888  // contiguous. Loop over all ownedPidLidPairs; skip those that have
889  // empty lists. FIXED
890 
891  for (size_t i = 0; i < dir_numMyEntries; ++i) {
892  const std::vector<std::pair<int, LO> >& pidLidList =
893  ownedPidLidPairs[i];
894  const size_t listLen = pidLidList.size();
895  if (listLen == 0) continue; // KDD will happen for GIDs not in
896  // KDD the user's source map
897  const LO dirMapLid = static_cast<LO> (i);
898  const GO dirMapGid = directoryMap_->getGlobalElement (dirMapLid);
899  if (listLen > 1) {
900  locallyOneToOne_ = false;
901  }
902  // If there is some (PID,LID) pair for the current input
903  // Map LID, then it makes sense to invoke the TieBreak
904  // object to arbitrate between the options. Even if
905  // there is only one (PID,LID) pair, we still want to
906  // give the TieBreak object a chance to do whatever it
907  // likes to do, in terms of side effects (e.g., track
908  // (PID,LID) pairs).
909  const size_type index =
910  static_cast<size_type> (tie_break->selectedIndex (dirMapGid,
911  pidLidList));
912  PIDs_[i] = pidLidList[index].first;
913  LIDs_[i] = pidLidList[index].second;
914  }
915  }
916  }
917  }
918 
919  template<class LO, class GO, class NT>
920  std::string
922  {
923  std::ostringstream os;
924  os << "DistributedNoncontiguousDirectory"
925  << "<" << Teuchos::TypeNameTraits<LO>::name ()
926  << ", " << Teuchos::TypeNameTraits<GO>::name ()
927  << ", " << Teuchos::TypeNameTraits<NT>::name () << ">";
928  return os.str ();
929  }
930 
931  template<class LO, class GO, class NT>
934  getEntriesImpl (const map_type& map,
935  const Teuchos::ArrayView<const GO> &globalIDs,
936  const Teuchos::ArrayView<int> &nodeIDs,
937  const Teuchos::ArrayView<LO> &localIDs,
938  const bool computeLIDs) const
939  {
940  using Teuchos::Array;
941  using Teuchos::ArrayRCP;
942  using Teuchos::ArrayView;
943  using Teuchos::as;
944  using Teuchos::RCP;
945  using Teuchos::toString;
946  using ::Tpetra::Details::Behavior;
947  using std::cerr;
948  using std::endl;
949  using size_type = typename Array<GO>::size_type;
950  const char funcPrefix[] = "Tpetra::"
951  "DistributedNoncontiguousDirectory::getEntriesImpl: ";
952  const char errSuffix[] =
953  " Please report this bug to the Tpetra developers.";
954 
955  RCP<const Teuchos::Comm<int> > comm = map.getComm ();
956  const bool verbose = Behavior::verbose ("Directory") ||
957  Behavior::verbose ("Tpetra::Directory");
958  std::unique_ptr<std::string> procPrefix;
959  if (verbose) {
960  std::ostringstream os;
961  os << "Proc ";
962  if (map.getComm ().is_null ()) {
963  os << "?";
964  }
965  else {
966  os << map.getComm ()->getRank ();
967  }
968  os << ": ";
969  procPrefix = std::unique_ptr<std::string> (new std::string (os.str ()));
970  os << funcPrefix << "{GIDs: " << toString (globalIDs)
971  << ", PIDs: " << toString (nodeIDs)
972  << ", LIDs: " << toString (localIDs)
973  << ", computeLIDs: " << (computeLIDs ? "true" : "false")
974  << "}" << endl;
975  cerr << os.str ();
976  }
977 
978  const size_t numEntries = globalIDs.size ();
979  const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid();
981 
982  //
983  // Set up directory structure.
984  //
985 
986  // If we're computing LIDs, we also have to include them in each
987  // packet, along with the GID and process ID.
988  const int packetSize = computeLIDs ? 3 : 2;
989 
990  // For data distribution, we use: Surprise! A Distributor!
991  Distributor distor (comm);
992 
993  // Get directory locations for the requested list of entries.
994  Array<int> dirImages (numEntries);
995  if (verbose) {
996  std::ostringstream os;
997  os << *procPrefix << "Call directoryMap_->getRemoteIndexList"
998  << endl;
999  cerr << os.str ();
1000  }
1001  res = directoryMap_->getRemoteIndexList (globalIDs, dirImages ());
1002  if (verbose) {
1003  std::ostringstream os;
1004  os << *procPrefix << "directoryMap_->getRemoteIndexList "
1005  "PIDs result: " << toString (dirImages) << endl;
1006  cerr << os.str ();
1007  }
1008 
1009  // Check for unfound globalIDs and set corresponding nodeIDs to -1
1010  size_t numMissing = 0;
1011  if (res == IDNotPresent) {
1012  for (size_t i=0; i < numEntries; ++i) {
1013  if (dirImages[i] == -1) {
1014  nodeIDs[i] = -1;
1015  if (computeLIDs) {
1016  localIDs[i] = LINVALID;
1017  }
1018  numMissing++;
1019  }
1020  }
1021  }
1022 
1023  Array<GO> sendGIDs;
1024  Array<int> sendImages;
1025  if (verbose) {
1026  std::ostringstream os;
1027  os << *procPrefix << "Call Distributor::createFromRecvs"
1028  << endl;
1029  cerr << os.str ();
1030  }
1031  distor.createFromRecvs (globalIDs, dirImages (), sendGIDs, sendImages);
1032  if (verbose) {
1033  std::ostringstream os;
1034  os << *procPrefix << "Distributor::createFromRecvs result: "
1035  << "{sendGIDs: " << toString (sendGIDs)
1036  << ", sendPIDs: " << toString (sendImages)
1037  << "}" << endl;
1038  cerr << os.str ();
1039  }
1040  const size_type numSends = sendGIDs.size ();
1041 
1042  //
1043  // mfh 13 Nov 2012:
1044  //
1045  // The code below temporarily stores LO, GO, and int values in
1046  // an array of global_size_t. If one of the signed types (LO
1047  // and GO should both be signed) happened to be -1 (or some
1048  // negative number, but -1 is the one that came up today), then
1049  // conversion to global_size_t will result in a huge
1050  // global_size_t value, and thus conversion back may overflow.
1051  // (Teuchos::as doesn't know that we meant it to be an LO or GO
1052  // all along.)
1053  //
1054  // The overflow normally would not be a problem, since it would
1055  // just go back to -1 again. However, Teuchos::as does range
1056  // checking on conversions in a debug build, so it throws an
1057  // exception (std::range_error) in this case. Range checking is
1058  // generally useful in debug mode, so we don't want to disable
1059  // this behavior globally.
1060  //
1061  // We solve this problem by forgoing use of Teuchos::as for the
1062  // conversions below from LO, GO, or int to global_size_t, and
1063  // the later conversions back from global_size_t to LO, GO, or
1064  // int.
1065  //
1066  // I've recorded this discussion as Bug 5760.
1067  //
1068 
1069  // global_size_t >= GO
1070  // global_size_t >= size_t >= int
1071  // global_size_t >= size_t >= LO
1072  // Therefore, we can safely store all of these in a global_size_t
1073  Array<global_size_t> exports (packetSize * numSends);
1074  {
1075  // Packet format:
1076  // - If computing LIDs: (GID, PID, LID)
1077  // - Otherwise: (GID, PID)
1078  //
1079  // "PID" means "process ID" (a.k.a. "node ID," a.k.a. "rank").
1080 
1081  // Current position to which to write in exports array. If
1082  // sending pairs, we pack the (GID, PID) pair for gid =
1083  // sendGIDs[k] in exports[2*k], exports[2*k+1]. If sending
1084  // triples, we pack the (GID, PID, LID) pair for gid =
1085  // sendGIDs[k] in exports[3*k, 3*k+1, 3*k+2].
1086  size_type exportsIndex = 0;
1087 
1088  if (useHashTables_) {
1089  if (verbose) {
1090  std::ostringstream os;
1091  os << *procPrefix << "Pack exports (useHashTables_ true)"
1092  << endl;
1093  cerr << os.str ();
1094  }
1095  for (size_type gidIndex = 0; gidIndex < numSends; ++gidIndex) {
1096  const GO curGID = sendGIDs[gidIndex];
1097  // Don't use as() here (see above note).
1098  exports[exportsIndex++] = static_cast<global_size_t> (curGID);
1099  const LO curLID = directoryMap_->getLocalElement (curGID);
1100  TEUCHOS_TEST_FOR_EXCEPTION
1101  (curLID == LINVALID, std::logic_error, funcPrefix <<
1102  "Directory Map's global index " << curGID << " lacks "
1103  "a corresponding local index." << errSuffix);
1104  // Don't use as() here (see above note).
1105  exports[exportsIndex++] =
1106  static_cast<global_size_t> (lidToPidTable_->get (curLID));
1107  if (computeLIDs) {
1108  // Don't use as() here (see above note).
1109  exports[exportsIndex++] =
1110  static_cast<global_size_t> (lidToLidTable_->get (curLID));
1111  }
1112  }
1113  }
1114  else {
1115  if (verbose) {
1116  std::ostringstream os;
1117  os << *procPrefix << "Pack exports (useHashTables_ false)"
1118  << endl;
1119  cerr << os.str ();
1120  }
1121  for (size_type gidIndex = 0; gidIndex < numSends; ++gidIndex) {
1122  const GO curGID = sendGIDs[gidIndex];
1123  // Don't use as() here (see above note).
1124  exports[exportsIndex++] = static_cast<global_size_t> (curGID);
1125  const LO curLID = directoryMap_->getLocalElement (curGID);
1126  TEUCHOS_TEST_FOR_EXCEPTION
1127  (curLID == LINVALID, std::logic_error, funcPrefix <<
1128  "Directory Map's global index " << curGID << " lacks "
1129  "a corresponding local index." << errSuffix);
1130  // Don't use as() here (see above note).
1131  exports[exportsIndex++] =
1132  static_cast<global_size_t> (PIDs_[curLID]);
1133  if (computeLIDs) {
1134  // Don't use as() here (see above note).
1135  exports[exportsIndex++] =
1136  static_cast<global_size_t> (LIDs_[curLID]);
1137  }
1138  }
1139  }
1140 
1141  TEUCHOS_TEST_FOR_EXCEPTION
1142  (exportsIndex > exports.size (), std::logic_error,
1143  funcPrefix << "On Process " << comm->getRank () << ", "
1144  "exportsIndex = " << exportsIndex << " > exports.size() = "
1145  << exports.size () << "." << errSuffix);
1146  }
1147 
1148  TEUCHOS_TEST_FOR_EXCEPTION
1149  (numEntries < numMissing, std::logic_error, funcPrefix <<
1150  "On Process " << comm->getRank () << ", numEntries = " <<
1151  numEntries << " < numMissing = " << numMissing << "." <<
1152  errSuffix);
1153 
1154  //
1155  // mfh 13 Nov 2012: See note above on conversions between
1156  // global_size_t and LO, GO, or int.
1157  //
1158  const size_t numRecv = numEntries - numMissing;
1159 
1160  {
1161  const size_t importLen = packetSize * distor.getTotalReceiveLength ();
1162  const size_t requiredImportLen = numRecv * packetSize;
1163  const int myRank = comm->getRank ();
1164  TEUCHOS_TEST_FOR_EXCEPTION(
1165  importLen < requiredImportLen, std::logic_error,
1166  "Tpetra::Details::DistributedNoncontiguousDirectory::getEntriesImpl: "
1167  "On Process " << myRank << ": The 'imports' array must have length "
1168  "at least " << requiredImportLen << ", but its actual length is " <<
1169  importLen << ". numRecv: " << numRecv << ", packetSize: " <<
1170  packetSize << ", numEntries (# GIDs): " << numEntries <<
1171  ", numMissing: " << numMissing << ": distor.getTotalReceiveLength(): "
1172  << distor.getTotalReceiveLength () << ". " << std::endl <<
1173  "Distributor description: " << distor.description () << ". "
1174  << std::endl <<
1175  "Please report this bug to the Tpetra developers.");
1176  }
1177 
1178  Array<global_size_t> imports (packetSize * distor.getTotalReceiveLength ());
1179  // FIXME (mfh 20 Mar 2014) One could overlap the sort2() below
1180  // with communication, by splitting this call into doPosts and
1181  // doWaits. The code is still correct in this form, however.
1182  if (verbose) {
1183  std::ostringstream os;
1184  os << *procPrefix << "Call doPostsAndWaits: {packetSize: "
1185  << packetSize << ", exports: " << toString (exports) << "}"
1186  << endl;
1187  cerr << os.str ();
1188  }
1189  distor.doPostsAndWaits (exports ().getConst (), packetSize, imports ());
1190  if (verbose) {
1191  std::ostringstream os;
1192  os << *procPrefix << "doPostsAndWaits result: "
1193  << toString (imports) << endl;
1194  cerr << os.str ();
1195  }
1196 
1197  Array<GO> sortedIDs (globalIDs); // deep copy (for later sorting)
1198  Array<GO> offset (numEntries); // permutation array (sort2 output)
1199  for (GO ii = 0; ii < static_cast<GO> (numEntries); ++ii) {
1200  offset[ii] = ii;
1201  }
1202  sort2 (sortedIDs.begin(), sortedIDs.begin() + numEntries, offset.begin());
1203  if (verbose) {
1204  std::ostringstream os;
1205  os << *procPrefix << "sortedIDs: " << toString (sortedIDs)
1206  << ", offset: " << toString (offset) << endl;
1207  cerr << os.str ();
1208  }
1209 
1210  size_t importsIndex = 0;
1211 
1212  // we know these conversions are in range, because we loaded this data
1213  //
1214  // Don't use as() for conversions here; we know they are in range.
1215  for (size_t i = 0; i < numRecv; ++i) {
1216  const GO curGID = static_cast<GO> (imports[importsIndex++]);
1217  auto p1 = std::equal_range (sortedIDs.begin(),
1218  sortedIDs.end(), curGID);
1219  if (p1.first != p1.second) {
1220  const size_t j = p1.first - sortedIDs.begin();
1221  nodeIDs[offset[j]] =
1222  static_cast<int> (imports[importsIndex++]);
1223  if (computeLIDs) {
1224  localIDs[offset[j]] =
1225  static_cast<LO> (imports[importsIndex++]);
1226  }
1227  if (nodeIDs[offset[j]] == -1) {
1228  res = IDNotPresent;
1229  }
1230  }
1231  }
1232 
1233  TEUCHOS_TEST_FOR_EXCEPTION
1234  (size_t (importsIndex) > size_t (imports.size ()),
1235  std::logic_error, funcPrefix << "On Process " <<
1236  comm->getRank () << ": importsIndex = " << importsIndex
1237  << " > imports.size() = " << imports.size () << ". "
1238  "numRecv: " << numRecv
1239  << ", packetSize: " << packetSize << ", "
1240  "numEntries (# GIDs): " << numEntries
1241  << ", numMissing: " << numMissing
1242  << ": distor.getTotalReceiveLength(): "
1243  << distor.getTotalReceiveLength () << "." << errSuffix);
1244  if (verbose) {
1245  std::ostringstream os;
1246  os << *procPrefix << funcPrefix << "Done!" << endl;
1247  cerr << os.str ();
1248  }
1249  return res;
1250  }
1251 
1252 
1253  template<class LO, class GO, class NT>
1254  bool
1256  isOneToOne (const Teuchos::Comm<int>& comm) const
1257  {
1258  if (oneToOneResult_ == ONE_TO_ONE_NOT_CALLED_YET) {
1259  const int lcl121 = isLocallyOneToOne () ? 1 : 0;
1260  int gbl121 = 0;
1261  Teuchos::reduceAll<int, int> (comm, Teuchos::REDUCE_MIN, lcl121,
1262  Teuchos::outArg (gbl121));
1263  oneToOneResult_ = (gbl121 == 1) ? ONE_TO_ONE_TRUE : ONE_TO_ONE_FALSE;
1264  }
1265  return (oneToOneResult_ == ONE_TO_ONE_TRUE);
1266  }
1267  } // namespace Details
1268 } // namespace Tpetra
1269 
1270 //
1271 // Explicit instantiation macro
1272 //
1273 // Must be expanded from within the Tpetra namespace!
1274 //
1275 #define TPETRA_DIRECTORYIMPL_INSTANT(LO,GO,NODE) \
1276  namespace Details { \
1277  template class Directory< LO , GO , NODE >; \
1278  template class ReplicatedDirectory< LO , GO , NODE >; \
1279  template class ContiguousUniformDirectory< LO, GO, NODE >; \
1280  template class DistributedContiguousDirectory< LO , GO , NODE >; \
1281  template class DistributedNoncontiguousDirectory< LO , GO , NODE >; \
1282  }
1283 
1284 #endif // __Tpetra_DirectoryImpl_def_hpp
void doPostsAndWaits(const Teuchos::ArrayView< const Packet > &exports, size_t numPackets, const Teuchos::ArrayView< Packet > &imports)
Execute the (forward) communication plan.
void initialize(int *argc, char ***argv)
Initialize Tpetra.
Interface for breaking ties in ownership.
std::string description() const
A one-line human-readable description of this object.
std::string description() const
Return a one-line description of this object.
bool isNodeGlobalElement(GlobalOrdinal globalIndex) const
Whether the given global index is owned by this Map on the calling process.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
Implementation of Directory for a distributed noncontiguous Map.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
bool isOneToOne(const Teuchos::Comm< int > &comm) const override
Whether the Directory&#39;s input Map is (globally) one to one.
Implementation of Directory for a distributed contiguous Map.
GlobalOrdinal getMinAllGlobalIndex() const
The minimum global index over all processes in the communicator.
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const override
Find process IDs and (optionally) local IDs for the given global IDs.
bool isOneToOne(const Teuchos::Comm< int > &comm) const override
Whether the Directory&#39;s input Map is (globally) one to one.
size_t global_size_t
Global size_t object.
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const override
Find process IDs and (optionally) local IDs for the given global IDs.
LookupStatus getEntries(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const
Sets up and executes a communication plan for a Tpetra DistObject.
static bool verbose()
Whether Tpetra is in verbose mode.
size_t getTotalReceiveLength() const
Total number of values this process will receive from other processes.
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const override
Find process IDs and (optionally) local IDs for the given global IDs.
std::string description() const
A one-line human-readable description of this object.
std::string description() const
A one-line human-readable description of this object.
LocalOrdinal getLocalElement(GlobalOrdinal globalIndex) const
The local index corresponding to the given global index.
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2)
Sort the first array, and apply the resulting permutation to the second array.
A parallel distribution of indices over processes.
ReplicatedDirectory()=default
Constructor (that takes no arguments).
std::string description() const
A one-line human-readable description of this object.
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const override
Find process IDs and (optionally) local IDs for the given global IDs.
Implementation of Directory for a contiguous, uniformly distributed Map.
void createFromRecvs(const Teuchos::ArrayView< const Ordinal > &remoteIDs, const Teuchos::ArrayView< const int > &remoteProcIDs, Teuchos::Array< Ordinal > &exportIDs, Teuchos::Array< int > &exportProcIDs)
Set up Distributor using list of process ranks from which to receive.
global_size_t getGlobalNumElements() const
The number of elements in this Map.