Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Map_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
14 
15 #ifndef TPETRA_MAP_DEF_HPP
16 #define TPETRA_MAP_DEF_HPP
17 
18 #include <memory>
19 #include <sstream>
20 #include <stdexcept>
21 #include <typeinfo>
22 
23 #include "Teuchos_as.hpp"
24 #include "Teuchos_TypeNameTraits.hpp"
25 #include "Teuchos_CommHelpers.hpp"
26 
27 #include "Tpetra_Directory.hpp" // must include for implicit instantiation to work
29 #include "Tpetra_Details_FixedHashTable.hpp"
32 #include "Tpetra_Core.hpp"
33 #include "Tpetra_Util.hpp"
34 #include "Tpetra_Details_mpiIsInitialized.hpp"
35 #include "Tpetra_Details_extractMpiCommFromTeuchos.hpp" // teuchosCommIsAnMpiComm
38 
39 namespace { // (anonymous)
40 
41  void
42  checkMapInputArray (const char ctorName[],
43  const void* indexList,
44  const size_t indexListSize,
45  const Teuchos::Comm<int>* const comm)
46  {
48 
49  const bool debug = Behavior::debug("Map");
50  if (debug) {
51  using Teuchos::outArg;
52  using Teuchos::REDUCE_MIN;
53  using Teuchos::reduceAll;
54  using std::endl;
55 
56  const int myRank = comm == nullptr ? 0 : comm->getRank ();
57  const bool verbose = Behavior::verbose("Map");
58  std::ostringstream lclErrStrm;
59  int lclSuccess = 1;
60 
61  if (indexListSize != 0 && indexList == nullptr) {
62  lclSuccess = 0;
63  if (verbose) {
64  lclErrStrm << "Proc " << myRank << ": indexList is null, "
65  "but indexListSize=" << indexListSize << " != 0." << endl;
66  }
67  }
68  int gblSuccess = 0; // output argument
69  reduceAll (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
70  if (gblSuccess != 1) {
71  std::ostringstream gblErrStrm;
72  gblErrStrm << "Tpetra::Map constructor " << ctorName <<
73  " detected a problem with the input array "
74  "(raw array, Teuchos::ArrayView, or Kokkos::View) "
75  "of global indices." << endl;
76  if (verbose) {
77  using ::Tpetra::Details::gathervPrint;
78  gathervPrint (gblErrStrm, lclErrStrm.str (), *comm);
79  }
80  TEUCHOS_TEST_FOR_EXCEPTION
81  (true, std::invalid_argument, gblErrStrm.str ());
82  }
83  }
84  }
85 
86 
87 
88 
89  template <class LocalOrdinal, class GlobalOrdinal, class ViewType>
90  void computeConstantsOnDevice(const ViewType& entryList, GlobalOrdinal & minMyGID, GlobalOrdinal & maxMyGID, GlobalOrdinal &firstContiguousGID, GlobalOrdinal &lastContiguousGID_val, LocalOrdinal &lastContiguousGID_loc) {
91  using LO = LocalOrdinal;
92  using GO = GlobalOrdinal;
93  using exec_space = typename ViewType::device_type::execution_space;
94  using range_policy = Kokkos::RangePolicy<exec_space, Kokkos::IndexType<LO> >;
95  const LO numLocalElements = entryList.extent(0);
96 
97  // We're going to use the minloc backwards because we need to have it sort on the "location" and have the "value" along for the
98  // ride, rather than the other way around
99  typedef typename Kokkos::MinLoc<LO,GO>::value_type minloc_type;
100  minloc_type myMinLoc;
101 
102  // Find the initial sequence of parallel gids
103  // To find the lastContiguousGID_, we find the first guy where entryList[i] - entryList[0] != i-0. That's the first non-contiguous guy.
104  // We want the one *before* that guy.
105  Kokkos::parallel_reduce(range_policy(0,numLocalElements),KOKKOS_LAMBDA(const LO & i, GO &l_myMin, GO&l_myMax, GO& l_firstCont, minloc_type & l_lastCont){
106  GO entry_0 = entryList[0];
107  GO entry_i = entryList[i];
108 
109  // Easy stuff
110  l_myMin = (l_myMin < entry_i) ? l_myMin : entry_i;
111  l_myMax = (l_myMax > entry_i) ? l_myMax : entry_i;
112  l_firstCont = entry_0;
113 
114  if(entry_i - entry_0 != i && l_lastCont.val >= i) {
115  // We're non-contiguous, so the guy before us could be the last contiguous guy
116  l_lastCont.val = i-1;
117  l_lastCont.loc = entryList[i-1];
118  }
119  else if (i == numLocalElements-1 && i < l_lastCont.val) {
120  // If we're last, we always think we're the last contiguous guy, unless someone non-contiguous is already here
121  l_lastCont.val = i;
122  l_lastCont.loc = entry_i;
123  }
124 
125  },Kokkos::Min<GO>(minMyGID),Kokkos::Max<GO>(maxMyGID),Kokkos::Min<GO>(firstContiguousGID),Kokkos::MinLoc<LO,GO>(myMinLoc));
126 
127  // This switch is intentional, since we're using MinLoc backwards
128  lastContiguousGID_val = myMinLoc.loc;
129  lastContiguousGID_loc = myMinLoc.val;
130  }
131 
132 
133 } // namespace (anonymous)
134 
135 namespace Tpetra {
136 
137  template <class LocalOrdinal, class GlobalOrdinal, class Node>
139  Map () :
140  comm_ (new Teuchos::SerialComm<int> ()),
141  indexBase_ (0),
142  numGlobalElements_ (0),
143  numLocalElements_ (0),
144  minMyGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
145  maxMyGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
146  minAllGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
147  maxAllGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
148  firstContiguousGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
149  lastContiguousGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
150  uniform_ (false), // trivially
151  contiguous_ (false),
152  distributed_ (false), // no communicator yet
153  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
154  {
157  }
158 
159 
160  template <class LocalOrdinal, class GlobalOrdinal, class Node>
162  Map (const global_size_t numGlobalElements,
163  const global_ordinal_type indexBase,
164  const Teuchos::RCP<const Teuchos::Comm<int> > &comm,
165  const LocalGlobal lOrG) :
166  comm_ (comm),
167  uniform_ (true),
168  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
169  {
170  using Teuchos::as;
171  using Teuchos::broadcast;
172  using Teuchos::outArg;
173  using Teuchos::reduceAll;
174  using Teuchos::REDUCE_MIN;
175  using Teuchos::REDUCE_MAX;
176  using Teuchos::typeName;
177  using std::endl;
178  using GO = global_ordinal_type;
179  using GST = global_size_t;
180  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
181  const char funcName[] = "Map(gblNumInds,indexBase,comm,LG)";
182  const char exPfx[] =
183  "Tpetra::Map::Map(gblNumInds,indexBase,comm,LG): ";
184 
185  const bool debug = Details::Behavior::debug("Map");
186  const bool verbose = Details::Behavior::verbose("Map");
187  std::unique_ptr<std::string> prefix;
188  if (verbose) {
189  prefix = Details::createPrefix(
190  comm_.getRawPtr(), "Map", funcName);
191  std::ostringstream os;
192  os << *prefix << "Start" << endl;
193  std::cerr << os.str();
194  }
197 
198  // In debug mode only, check whether numGlobalElements and
199  // indexBase are the same over all processes in the communicator.
200  if (debug) {
201  GST proc0NumGlobalElements = numGlobalElements;
202  broadcast(*comm_, 0, outArg(proc0NumGlobalElements));
203  GST minNumGlobalElements = numGlobalElements;
204  GST maxNumGlobalElements = numGlobalElements;
205  reduceAll(*comm, REDUCE_MIN, numGlobalElements,
206  outArg(minNumGlobalElements));
207  reduceAll(*comm, REDUCE_MAX, numGlobalElements,
208  outArg(maxNumGlobalElements));
209  TEUCHOS_TEST_FOR_EXCEPTION
210  (minNumGlobalElements != maxNumGlobalElements ||
211  numGlobalElements != minNumGlobalElements,
212  std::invalid_argument, exPfx << "All processes must "
213  "provide the same number of global elements. Process 0 set "
214  "numGlobalElements="<< proc0NumGlobalElements << ". The "
215  "calling process " << comm->getRank() << " set "
216  "numGlobalElements=" << numGlobalElements << ". The min "
217  "and max values over all processes are "
218  << minNumGlobalElements << " resp. " << maxNumGlobalElements
219  << ".");
220 
221  GO proc0IndexBase = indexBase;
222  broadcast<int, GO> (*comm_, 0, outArg (proc0IndexBase));
223  GO minIndexBase = indexBase;
224  GO maxIndexBase = indexBase;
225  reduceAll(*comm, REDUCE_MIN, indexBase, outArg(minIndexBase));
226  reduceAll(*comm, REDUCE_MAX, indexBase, outArg(maxIndexBase));
227  TEUCHOS_TEST_FOR_EXCEPTION
228  (minIndexBase != maxIndexBase || indexBase != minIndexBase,
229  std::invalid_argument, exPfx << "All processes must "
230  "provide the same indexBase argument. Process 0 set "
231  "indexBase=" << proc0IndexBase << ". The calling process "
232  << comm->getRank() << " set indexBase=" << indexBase
233  << ". The min and max values over all processes are "
234  << minIndexBase << " resp. " << maxIndexBase << ".");
235  }
236 
237  // Distribute the elements across the processes in the given
238  // communicator so that global IDs (GIDs) are
239  //
240  // - Nonoverlapping (only one process owns each GID)
241  // - Contiguous (the sequence of GIDs is nondecreasing, and no two
242  // adjacent GIDs differ by more than one)
243  // - As evenly distributed as possible (the numbers of GIDs on two
244  // different processes do not differ by more than one)
245 
246  // All processes have the same numGlobalElements, but we still
247  // need to check that it is valid. numGlobalElements must be
248  // positive and not the "invalid" value (GSTI).
249  //
250  // This comparison looks funny, but it avoids compiler warnings
251  // for comparing unsigned integers (numGlobalElements_in is a
252  // GST, which is unsigned) while still working if we
253  // later decide to make GST signed.
254  TEUCHOS_TEST_FOR_EXCEPTION(
255  (numGlobalElements < 1 && numGlobalElements != 0),
256  std::invalid_argument, exPfx << "numGlobalElements (= "
257  << numGlobalElements << ") must be nonnegative.");
258 
259  TEUCHOS_TEST_FOR_EXCEPTION
260  (numGlobalElements == GSTI, std::invalid_argument, exPfx <<
261  "You provided numGlobalElements = Teuchos::OrdinalTraits<"
262  "Tpetra::global_size_t>::invalid(). This version of the "
263  "constructor requires a valid value of numGlobalElements. "
264  "You probably mistook this constructor for the \"contiguous "
265  "nonuniform\" constructor, which can compute the global "
266  "number of elements for you if you set numGlobalElements to "
267  "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid().");
268 
269  size_t numLocalElements = 0; // will set below
270  if (lOrG == GloballyDistributed) {
271  // Compute numLocalElements:
272  //
273  // If numGlobalElements == numProcs * B + remainder,
274  // then Proc r gets B+1 elements if r < remainder,
275  // and B elements if r >= remainder.
276  //
277  // This strategy is valid for any value of numGlobalElements and
278  // numProcs, including the following border cases:
279  // - numProcs == 1
280  // - numLocalElements < numProcs
281  //
282  // In the former case, remainder == 0 && numGlobalElements ==
283  // numLocalElements. In the latter case, remainder ==
284  // numGlobalElements && numLocalElements is either 0 or 1.
285  const GST numProcs = static_cast<GST> (comm_->getSize ());
286  const GST myRank = static_cast<GST> (comm_->getRank ());
287  const GST quotient = numGlobalElements / numProcs;
288  const GST remainder = numGlobalElements - quotient * numProcs;
289 
290  GO startIndex;
291  if (myRank < remainder) {
292  numLocalElements = static_cast<size_t> (1) + static_cast<size_t> (quotient);
293  // myRank was originally an int, so it should never overflow
294  // reasonable GO types.
295  startIndex = as<GO> (myRank) * as<GO> (numLocalElements);
296  } else {
297  numLocalElements = as<size_t> (quotient);
298  startIndex = as<GO> (myRank) * as<GO> (numLocalElements) +
299  as<GO> (remainder);
300  }
301 
302  minMyGID_ = indexBase + startIndex;
303  maxMyGID_ = indexBase + startIndex + numLocalElements - 1;
304  minAllGID_ = indexBase;
305  maxAllGID_ = indexBase + numGlobalElements - 1;
306  distributed_ = (numProcs > 1);
307  }
308  else { // lOrG == LocallyReplicated
309  numLocalElements = as<size_t> (numGlobalElements);
310  minMyGID_ = indexBase;
311  maxMyGID_ = indexBase + numGlobalElements - 1;
312  distributed_ = false;
313  }
314 
315  minAllGID_ = indexBase;
316  maxAllGID_ = indexBase + numGlobalElements - 1;
317  indexBase_ = indexBase;
318  numGlobalElements_ = numGlobalElements;
319  numLocalElements_ = numLocalElements;
320  firstContiguousGID_ = minMyGID_;
321  lastContiguousGID_ = maxMyGID_;
322  contiguous_ = true;
323 
324  // Create the Directory on demand in getRemoteIndexList().
325  //setupDirectory ();
326 
327  if (verbose) {
328  std::ostringstream os;
329  os << *prefix << "Done" << endl;
330  std::cerr << os.str();
331  }
332  }
333 
334 
335  template <class LocalOrdinal, class GlobalOrdinal, class Node>
337  Map (const global_size_t numGlobalElements,
338  const size_t numLocalElements,
339  const global_ordinal_type indexBase,
340  const Teuchos::RCP<const Teuchos::Comm<int> > &comm) :
341  comm_ (comm),
342  uniform_ (false),
343  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
344  {
345  using Teuchos::as;
346  using Teuchos::broadcast;
347  using Teuchos::outArg;
348  using Teuchos::reduceAll;
349  using Teuchos::REDUCE_MIN;
350  using Teuchos::REDUCE_MAX;
351  using Teuchos::REDUCE_SUM;
352  using Teuchos::scan;
353  using std::endl;
354  using GO = global_ordinal_type;
355  using GST = global_size_t;
356  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
357  const char funcName[] =
358  "Map(gblNumInds,lclNumInds,indexBase,comm)";
359  const char exPfx[] =
360  "Tpetra::Map::Map(gblNumInds,lclNumInds,indexBase,comm): ";
361  const char suffix[] =
362  ". Please report this bug to the Tpetra developers.";
363 
364  const bool debug = Details::Behavior::debug("Map");
365  const bool verbose = Details::Behavior::verbose("Map");
366  std::unique_ptr<std::string> prefix;
367  if (verbose) {
368  prefix = Details::createPrefix(
369  comm_.getRawPtr(), "Map", funcName);
370  std::ostringstream os;
371  os << *prefix << "Start" << endl;
372  std::cerr << os.str();
373  }
376 
377  // Global sum of numLocalElements over all processes.
378  // Keep this for later debug checks.
379  GST debugGlobalSum {};
380  if (debug) {
381  debugGlobalSum = initialNonuniformDebugCheck(exPfx,
382  numGlobalElements, numLocalElements, indexBase, comm);
383  }
384 
385  // Distribute the elements across the nodes so that they are
386  // - non-overlapping
387  // - contiguous
388 
389  // This differs from the first Map constructor (that only takes a
390  // global number of elements) in that the user has specified the
391  // number of local elements, so that the elements are not
392  // (necessarily) evenly distributed over the processes.
393 
394  // Compute my local offset. This is an inclusive scan, so to get
395  // the final offset, we subtract off the input.
396  GO scanResult = 0;
397  scan<int, GO> (*comm, REDUCE_SUM, numLocalElements, outArg (scanResult));
398  const GO myOffset = scanResult - numLocalElements;
399 
400  if (numGlobalElements != GSTI) {
401  numGlobalElements_ = numGlobalElements; // Use the user's value.
402  }
403  else {
404  // Inclusive scan means that the last process has the final sum.
405  // Rather than doing a reduceAll to get the sum of
406  // numLocalElements, we can just have the last process broadcast
407  // its result. That saves us a round of log(numProcs) messages.
408  const int numProcs = comm->getSize ();
409  GST globalSum = scanResult;
410  if (numProcs > 1) {
411  broadcast (*comm, numProcs - 1, outArg (globalSum));
412  }
413  numGlobalElements_ = globalSum;
414 
415  if (debug) {
416  // No need for an all-reduce here; both come from collectives.
417  TEUCHOS_TEST_FOR_EXCEPTION
418  (globalSum != debugGlobalSum, std::logic_error, exPfx <<
419  "globalSum = " << globalSum << " != debugGlobalSum = " <<
420  debugGlobalSum << suffix);
421  }
422  }
423  numLocalElements_ = numLocalElements;
424  indexBase_ = indexBase;
425  minAllGID_ = (numGlobalElements_ == 0) ?
426  std::numeric_limits<GO>::max () :
427  indexBase;
428  maxAllGID_ = (numGlobalElements_ == 0) ?
429  std::numeric_limits<GO>::lowest () :
430  indexBase + GO(numGlobalElements_) - GO(1);
431  minMyGID_ = (numLocalElements_ == 0) ?
432  std::numeric_limits<GO>::max () :
433  indexBase + GO(myOffset);
434  maxMyGID_ = (numLocalElements_ == 0) ?
435  std::numeric_limits<GO>::lowest () :
436  indexBase + myOffset + GO(numLocalElements) - GO(1);
437  firstContiguousGID_ = minMyGID_;
438  lastContiguousGID_ = maxMyGID_;
439  contiguous_ = true;
440  distributed_ = checkIsDist ();
441 
442  // Create the Directory on demand in getRemoteIndexList().
443  //setupDirectory ();
444 
445  if (verbose) {
446  std::ostringstream os;
447  os << *prefix << "Done" << endl;
448  std::cerr << os.str();
449  }
450  }
451 
452  template <class LocalOrdinal, class GlobalOrdinal, class Node>
456  const char errorMessagePrefix[],
457  const global_size_t numGlobalElements,
458  const size_t numLocalElements,
459  const global_ordinal_type indexBase,
460  const Teuchos::RCP<const Teuchos::Comm<int>>& comm) const
461  {
462  const bool debug = Details::Behavior::debug("Map");
463  if (! debug) {
464  return global_size_t(0);
465  }
466 
467  using Teuchos::broadcast;
468  using Teuchos::outArg;
469  using Teuchos::ptr;
470  using Teuchos::REDUCE_MAX;
471  using Teuchos::REDUCE_MIN;
472  using Teuchos::REDUCE_SUM;
473  using Teuchos::reduceAll;
474  using GO = global_ordinal_type;
475  using GST = global_size_t;
476  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
477 
478  // The user has specified the distribution of indices over the
479  // processes. The distribution is not necessarily contiguous or
480  // equally shared over the processes.
481  //
482  // We assume that the number of local elements can be stored in a
483  // size_t. The instance member numLocalElements_ is a size_t, so
484  // this variable and that should have the same type.
485 
486  GST debugGlobalSum = 0; // Will be global sum of numLocalElements
487  reduceAll<int, GST> (*comm, REDUCE_SUM, static_cast<GST> (numLocalElements),
488  outArg (debugGlobalSum));
489  // In debug mode only, check whether numGlobalElements and
490  // indexBase are the same over all processes in the communicator.
491  {
492  GST proc0NumGlobalElements = numGlobalElements;
493  broadcast<int, GST> (*comm_, 0, outArg (proc0NumGlobalElements));
494  GST minNumGlobalElements = numGlobalElements;
495  GST maxNumGlobalElements = numGlobalElements;
496  reduceAll<int, GST> (*comm, REDUCE_MIN, numGlobalElements,
497  outArg (minNumGlobalElements));
498  reduceAll<int, GST> (*comm, REDUCE_MAX, numGlobalElements,
499  outArg (maxNumGlobalElements));
500  TEUCHOS_TEST_FOR_EXCEPTION
501  (minNumGlobalElements != maxNumGlobalElements ||
502  numGlobalElements != minNumGlobalElements,
503  std::invalid_argument, errorMessagePrefix << "All processes "
504  "must provide the same number of global elements, even if "
505  "that argument is "
506  "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid() "
507  "(which signals that the Map should compute the global "
508  "number of elements). Process 0 set numGlobalElements"
509  "=" << proc0NumGlobalElements << ". The calling process "
510  << comm->getRank() << " set numGlobalElements=" <<
511  numGlobalElements << ". The min and max values over all "
512  "processes are " << minNumGlobalElements << " resp. " <<
513  maxNumGlobalElements << ".");
514 
515  GO proc0IndexBase = indexBase;
516  broadcast<int, GO> (*comm_, 0, outArg (proc0IndexBase));
517  GO minIndexBase = indexBase;
518  GO maxIndexBase = indexBase;
519  reduceAll<int, GO> (*comm, REDUCE_MIN, indexBase, outArg (minIndexBase));
520  reduceAll<int, GO> (*comm, REDUCE_MAX, indexBase, outArg (maxIndexBase));
521  TEUCHOS_TEST_FOR_EXCEPTION
522  (minIndexBase != maxIndexBase || indexBase != minIndexBase,
523  std::invalid_argument, errorMessagePrefix <<
524  "All processes must provide the same indexBase argument. "
525  "Process 0 set indexBase = " << proc0IndexBase << ". The "
526  "calling process " << comm->getRank() << " set indexBase="
527  << indexBase << ". The min and max values over all "
528  "processes are " << minIndexBase << " resp. " << maxIndexBase
529  << ".");
530 
531  // Make sure that the sum of numLocalElements over all processes
532  // equals numGlobalElements.
533  TEUCHOS_TEST_FOR_EXCEPTION
534  (numGlobalElements != GSTI &&
535  debugGlobalSum != numGlobalElements, std::invalid_argument,
536  errorMessagePrefix << "The sum of each process' number of "
537  "indices over all processes, " << debugGlobalSum << ", != "
538  << "numGlobalElements=" << numGlobalElements << ". If you "
539  "would like this constructor to compute numGlobalElements "
540  "for you, you may set numGlobalElements="
541  "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid() "
542  "on input. Please note that this is NOT necessarily -1.");
543  }
544  return debugGlobalSum;
545  }
546 
547  template <class LocalOrdinal, class GlobalOrdinal, class Node>
548  void
549  Map<LocalOrdinal,GlobalOrdinal,Node>::
550  initWithNonownedHostIndexList(
551  const char errorMessagePrefix[],
552  const global_size_t numGlobalElements,
553  const Kokkos::View<const global_ordinal_type*,
554  Kokkos::LayoutLeft,
555  Kokkos::HostSpace,
556  Kokkos::MemoryUnmanaged>& entryList_host,
557  const global_ordinal_type indexBase,
558  const Teuchos::RCP<const Teuchos::Comm<int>>& comm)
559  {
560  Tpetra::Details::ProfilingRegion pr("Map::initWithNonownedHostIndexList()");
561 
562  using Kokkos::LayoutLeft;
563  using Kokkos::subview;
564  using Kokkos::View;
565  using Kokkos::view_alloc;
566  using Kokkos::WithoutInitializing;
567  using Teuchos::as;
568  using Teuchos::broadcast;
569  using Teuchos::outArg;
570  using Teuchos::ptr;
571  using Teuchos::REDUCE_MAX;
572  using Teuchos::REDUCE_MIN;
573  using Teuchos::REDUCE_SUM;
574  using Teuchos::reduceAll;
575  using LO = local_ordinal_type;
576  using GO = global_ordinal_type;
577  using GST = global_size_t;
578  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
579  // Make sure that Kokkos has been initialized (Github Issue #513).
580  TEUCHOS_TEST_FOR_EXCEPTION
581  (! Kokkos::is_initialized (), std::runtime_error,
582  errorMessagePrefix << "The Kokkos execution space "
583  << Teuchos::TypeNameTraits<execution_space>::name()
584  << " has not been initialized. "
585  "Please initialize it before creating a Map.")
586 
587  // The user has specified the distribution of indices over the
588  // processes, via the input array of global indices on each
589  // process. The distribution is not necessarily contiguous or
590  // equally shared over the processes.
591 
592  // The length of the input array on this process is the number of
593  // local indices to associate with this process, even though the
594  // input array contains global indices. We assume that the number
595  // of local indices on a process can be stored in a size_t;
596  // numLocalElements_ is a size_t, so this variable and that should
597  // have the same type.
598  const size_t numLocalElements(entryList_host.size());
599 
600  initialNonuniformDebugCheck(errorMessagePrefix, numGlobalElements,
601  numLocalElements, indexBase, comm);
602 
603  // NOTE (mfh 20 Feb 2013, 10 Oct 2016) In some sense, this global
604  // reduction is redundant, since the directory Map will have to do
605  // the same thing. Thus, we could do the scan and broadcast for
606  // the directory Map here, and give the computed offsets to the
607  // directory Map's constructor. However, a reduction costs less
608  // than a scan and broadcast, so this still saves time if users of
609  // this Map don't ever need the Directory (i.e., if they never
610  // call getRemoteIndexList on this Map).
611  if (numGlobalElements != GSTI) {
612  numGlobalElements_ = numGlobalElements; // Use the user's value.
613  }
614  else { // The user wants us to compute the sum.
615  reduceAll(*comm, REDUCE_SUM,
616  static_cast<GST>(numLocalElements),
617  outArg(numGlobalElements_));
618  }
619 
620  // mfh 20 Feb 2013: We've never quite done the right thing for
621  // duplicate GIDs here. Duplicate GIDs have always been counted
622  // distinctly in numLocalElements_, and thus should get a
623  // different LID. However, we've always used std::map or a hash
624  // table for the GID -> LID lookup table, so distinct GIDs always
625  // map to the same LID. Furthermore, the order of the input GID
626  // list matters, so it's not desirable to sort for determining
627  // uniqueness.
628  //
629  // I've chosen for now to write this code as if the input GID list
630  // contains no duplicates. If this is not desired, we could use
631  // the lookup table itself to determine uniqueness: If we haven't
632  // seen the GID before, it gets a new LID and it's added to the
633  // LID -> GID and GID -> LID tables. If we have seen the GID
634  // before, it doesn't get added to either table. I would
635  // implement this, but it would cost more to do the double lookups
636  // in the table (one to check, and one to insert).
637  //
638  // More importantly, since we build the GID -> LID table in (a
639  // thread-) parallel (way), the order in which duplicate GIDs may
640  // get inserted is not defined. This would make the assignment of
641  // LID to GID nondeterministic.
642 
643  numLocalElements_ = numLocalElements;
644  indexBase_ = indexBase;
645 
646  minMyGID_ = indexBase_;
647  maxMyGID_ = indexBase_;
648 
649  // NOTE (mfh 27 May 2015): While finding the initial contiguous
650  // GID range requires looking at all the GIDs in the range,
651  // dismissing an interval of GIDs only requires looking at the
652  // first and last GIDs. Thus, we could do binary search backwards
653  // from the end in order to catch the common case of a contiguous
654  // interval followed by noncontiguous entries. On the other hand,
655  // we could just expose this case explicitly as yet another Map
656  // constructor, and avoid the trouble of detecting it.
657  if (numLocalElements_ > 0) {
658  // Find contiguous GID range, with the restriction that the
659  // beginning of the range starts with the first entry. While
660  // doing so, fill in the LID -> GID table.
661  typename decltype (lgMap_)::non_const_type lgMap
662  (view_alloc ("lgMap", WithoutInitializing), numLocalElements_);
663  auto lgMap_host =
664  Kokkos::create_mirror_view (Kokkos::HostSpace (), lgMap);
665 
666  // The input array entryList_host is already on host, so we
667  // don't need to take a host view of it.
668  // auto entryList_host =
669  // Kokkos::create_mirror_view (Kokkos::HostSpace (), entryList);
670  // Kokkos::deep_copy (entryList_host, entryList);
671 
672  firstContiguousGID_ = entryList_host[0];
673  lastContiguousGID_ = firstContiguousGID_+1;
674 
675  // FIXME (mfh 23 Sep 2015) We need to copy the input GIDs
676  // anyway, so we have to look at them all. The logical way to
677  // find the first noncontiguous entry would thus be to "reduce,"
678  // where the local reduction result is whether entryList[i] + 1
679  // == entryList[i+1].
680 
681  lgMap_host[0] = firstContiguousGID_;
682  size_t i = 1;
683  for ( ; i < numLocalElements_; ++i) {
684  const GO curGid = entryList_host[i];
685  const LO curLid = as<LO> (i);
686 
687  if (lastContiguousGID_ != curGid) break;
688 
689  // Add the entry to the LID->GID table only after we know that
690  // the current GID is in the initial contiguous sequence, so
691  // that we don't repeat adding it in the first iteration of
692  // the loop below over the remaining noncontiguous GIDs.
693  lgMap_host[curLid] = curGid;
694  ++lastContiguousGID_;
695  }
696  --lastContiguousGID_;
697  // NOTE: i is the first non-contiguous index.
698 
699  // [firstContiguousGID_, lastContigousGID_] is the initial
700  // sequence of contiguous GIDs. We can start the min and max
701  // GID using this range.
702  minMyGID_ = firstContiguousGID_;
703  maxMyGID_ = lastContiguousGID_;
704 
705  // Compute the GID -> LID lookup table, _not_ including the
706  // initial sequence of contiguous GIDs.
707  LO firstNonContiguous_loc=i;
708  {
709 
710  const std::pair<size_t, size_t> ncRange (i, entryList_host.extent (0));
711  auto nonContigGids_host = subview (entryList_host, ncRange);
712  TEUCHOS_TEST_FOR_EXCEPTION
713  (static_cast<size_t> (nonContigGids_host.extent (0)) !=
714  static_cast<size_t> (entryList_host.extent (0) - i),
715  std::logic_error, "Tpetra::Map noncontiguous constructor: "
716  "nonContigGids_host.extent(0) = "
717  << nonContigGids_host.extent (0)
718  << " != entryList_host.extent(0) - i = "
719  << (entryList_host.extent (0) - i) << " = "
720  << entryList_host.extent (0) << " - " << i
721  << ". Please report this bug to the Tpetra developers.");
722 
723  // FixedHashTable's constructor expects an owned device View,
724  // so we must deep-copy the subview of the input indices.
725  View<GO*, LayoutLeft, device_type>
726  nonContigGids (view_alloc ("nonContigGids", WithoutInitializing),
727  nonContigGids_host.size ());
728 
729 
730  // DEEP_COPY REVIEW - HOST-TO-DEVICE
731  Kokkos::deep_copy (execution_space(), nonContigGids, nonContigGids_host);
732  Kokkos::fence("Map::initWithNonownedHostIndexList"); // for UVM issues below - which will be refatored soon so FixedHashTable can build as pure CudaSpace - then I think remove this fence
733 
734  glMap_ = global_to_local_table_type(nonContigGids,
735  firstNonContiguous_loc);
736  // Make host version - when memory spaces match these just do trivial assignment
737  glMapHost_ = global_to_local_table_host_type(glMap_);
738  }
739 
740  // FIXME (mfh 10 Oct 2016) When we construct the global-to-local
741  // table above, we have to look at all the (noncontiguous) input
742  // indices anyway. Thus, why not have the constructor compute
743  // and return the min and max?
744 
745  for ( ; i < numLocalElements_; ++i) {
746  const GO curGid = entryList_host[i];
747  const LO curLid = static_cast<LO> (i);
748  lgMap_host[curLid] = curGid; // LID -> GID table
749 
750  // While iterating through entryList, we compute its
751  // (process-local) min and max elements.
752  if (curGid < minMyGID_) {
753  minMyGID_ = curGid;
754  }
755  if (curGid > maxMyGID_) {
756  maxMyGID_ = curGid;
757  }
758  }
759 
760  // We filled lgMap on host above; now sync back to device.
761  // DEEP_COPY REVIEW - HOST-TO-DEVICE
762  Kokkos::deep_copy (execution_space(), lgMap, lgMap_host);
763 
764  // "Commit" the local-to-global lookup table we filled in above.
765  lgMap_ = lgMap;
766  // We've already created this, so use it.
767  lgMapHost_ = lgMap_host;
768 
769 
770  }
771  else {
772  minMyGID_ = std::numeric_limits<GlobalOrdinal>::max();
773  maxMyGID_ = std::numeric_limits<GlobalOrdinal>::lowest();
774  // This insures tests for GIDs in the range
775  // [firstContiguousGID_, lastContiguousGID_] fail for processes
776  // with no local elements.
777  firstContiguousGID_ = indexBase_+1;
778  lastContiguousGID_ = indexBase_;
779  // glMap_ was default constructed, so it's already empty.
780 
781  }
782 
783 
784 
785 
786  // Compute the min and max of all processes' GIDs. If
787  // numLocalElements_ == 0 on this process, minMyGID_ and maxMyGID_
788  // are both indexBase_. This is wrong, but fixing it would
789  // require either a fancy sparse all-reduce, or a custom reduction
790  // operator that ignores invalid values ("invalid" means
791  // Tpetra::Details::OrdinalTraits<GO>::invalid()).
792  //
793  // Also, while we're at it, use the same all-reduce to figure out
794  // if the Map is distributed. "Distributed" means that there is
795  // at least one process with a number of local elements less than
796  // the number of global elements.
797  //
798  // We're computing the min and max of all processes' GIDs using a
799  // single MAX all-reduce, because min(x,y) = -max(-x,-y) (when x
800  // and y are signed). (This lets us combine the min and max into
801  // a single all-reduce.) If each process sets localDist=1 if its
802  // number of local elements is strictly less than the number of
803  // global elements, and localDist=0 otherwise, then a MAX
804  // all-reduce on localDist tells us if the Map is distributed (1
805  // if yes, 0 if no). Thus, we can append localDist onto the end
806  // of the data and get the global result from the all-reduce.
807  if (std::numeric_limits<GO>::is_signed) {
808  // Does my process NOT own all the elements?
809  const GO localDist =
810  (as<GST> (numLocalElements_) < numGlobalElements_) ? 1 : 0;
811 
812  GO minMaxInput[3];
813  minMaxInput[0] = -minMyGID_;
814  minMaxInput[1] = maxMyGID_;
815  minMaxInput[2] = localDist;
816 
817  GO minMaxOutput[3];
818  minMaxOutput[0] = 0;
819  minMaxOutput[1] = 0;
820  minMaxOutput[2] = 0;
821  reduceAll<int, GO> (*comm, REDUCE_MAX, 3, minMaxInput, minMaxOutput);
822  minAllGID_ = -minMaxOutput[0];
823  maxAllGID_ = minMaxOutput[1];
824  const GO globalDist = minMaxOutput[2];
825  distributed_ = (comm_->getSize () > 1 && globalDist == 1);
826  }
827  else { // unsigned; use two reductions
828  // This is always correct, no matter the signedness of GO.
829  reduceAll<int, GO> (*comm_, REDUCE_MIN, minMyGID_, outArg (minAllGID_));
830  reduceAll<int, GO> (*comm_, REDUCE_MAX, maxMyGID_, outArg (maxAllGID_));
831  distributed_ = checkIsDist ();
832  }
833 
834  contiguous_ = false; // "Contiguous" is conservative.
835 
836  TEUCHOS_TEST_FOR_EXCEPTION(
837  minAllGID_ < indexBase_,
838  std::invalid_argument,
839  "Tpetra::Map constructor (noncontiguous): "
840  "Minimum global ID = " << minAllGID_ << " over all process(es) is "
841  "less than the given indexBase = " << indexBase_ << ".");
842 
843  // Create the Directory on demand in getRemoteIndexList().
844  //setupDirectory ();
845  }
846 
847  template <class LocalOrdinal, class GlobalOrdinal, class Node>
849  Map (const global_size_t numGlobalElements,
850  const GlobalOrdinal indexList[],
851  const LocalOrdinal indexListSize,
852  const GlobalOrdinal indexBase,
853  const Teuchos::RCP<const Teuchos::Comm<int> >& comm) :
854  comm_ (comm),
855  uniform_ (false),
856  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
857  {
858  using std::endl;
859  const char funcName[] =
860  "Map(gblNumInds,indexList,indexListSize,indexBase,comm)";
861 
862  const bool verbose = Details::Behavior::verbose("Map");
863  std::unique_ptr<std::string> prefix;
864  if (verbose) {
865  prefix = Details::createPrefix(
866  comm_.getRawPtr(), "Map", funcName);
867  std::ostringstream os;
868  os << *prefix << "Start" << endl;
869  std::cerr << os.str();
870  }
874  checkMapInputArray ("(GST, const GO[], LO, GO, comm)",
875  indexList, static_cast<size_t> (indexListSize),
876  comm.getRawPtr ());
877  // Not quite sure if I trust all code to behave correctly if the
878  // pointer is nonnull but the array length is nonzero, so I'll
879  // make sure the raw pointer is null if the length is zero.
880  const GlobalOrdinal* const indsRaw = indexListSize == 0 ? NULL : indexList;
881  Kokkos::View<const GlobalOrdinal*,
882  Kokkos::LayoutLeft,
883  Kokkos::HostSpace,
884  Kokkos::MemoryUnmanaged> inds (indsRaw, indexListSize);
885  initWithNonownedHostIndexList(funcName, numGlobalElements, inds,
886  indexBase, comm);
887  if (verbose) {
888  std::ostringstream os;
889  os << *prefix << "Done" << endl;
890  std::cerr << os.str();
891  }
892  }
893 
894  template <class LocalOrdinal, class GlobalOrdinal, class Node>
896  Map (const global_size_t numGlobalElements,
897  const Teuchos::ArrayView<const GlobalOrdinal>& entryList,
898  const GlobalOrdinal indexBase,
899  const Teuchos::RCP<const Teuchos::Comm<int> >& comm) :
900  comm_ (comm),
901  uniform_ (false),
902  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
903  {
904  using std::endl;
905  const char* funcName = "Map(gblNumInds,entryList(Teuchos::ArrayView),indexBase,comm)";
906 
907  const bool verbose = Details::Behavior::verbose("Map");
908  std::unique_ptr<std::string> prefix;
909  if (verbose) {
910  prefix = Details::createPrefix(
911  comm_.getRawPtr(), "Map", funcName);
912  std::ostringstream os;
913  os << *prefix << "Start" << endl;
914  std::cerr << os.str();
915  }
919  const size_t numLclInds = static_cast<size_t> (entryList.size ());
920  checkMapInputArray ("(GST, ArrayView, GO, comm)",
921  entryList.getRawPtr (), numLclInds,
922  comm.getRawPtr ());
923  // Not quite sure if I trust both ArrayView and View to behave
924  // correctly if the pointer is nonnull but the array length is
925  // nonzero, so I'll make sure it's null if the length is zero.
926  const GlobalOrdinal* const indsRaw =
927  numLclInds == 0 ? NULL : entryList.getRawPtr ();
928  Kokkos::View<const GlobalOrdinal*,
929  Kokkos::LayoutLeft,
930  Kokkos::HostSpace,
931  Kokkos::MemoryUnmanaged> inds (indsRaw, numLclInds);
932  initWithNonownedHostIndexList(funcName, numGlobalElements, inds,
933  indexBase, comm);
934  if (verbose) {
935  std::ostringstream os;
936  os << *prefix << "Done" << endl;
937  std::cerr << os.str();
938  }
939  }
940 
941 
942  template <class LocalOrdinal, class GlobalOrdinal, class Node>
944  Map (const global_size_t numGlobalElements,
945  const Kokkos::View<const GlobalOrdinal*, device_type>& entryList,
946  const GlobalOrdinal indexBase,
947  const Teuchos::RCP<const Teuchos::Comm<int> >& comm) :
948  comm_ (comm),
949  uniform_ (false),
950  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
951  {
952  using Kokkos::LayoutLeft;
953  using Kokkos::subview;
954  using Kokkos::View;
955  using Kokkos::view_alloc;
956  using Kokkos::WithoutInitializing;
957  using Teuchos::arcp;
958  using Teuchos::ArrayView;
959  using Teuchos::as;
960  using Teuchos::broadcast;
961  using Teuchos::outArg;
962  using Teuchos::ptr;
963  using Teuchos::REDUCE_MAX;
964  using Teuchos::REDUCE_MIN;
965  using Teuchos::REDUCE_SUM;
966  using Teuchos::reduceAll;
967  using Teuchos::typeName;
968  using std::endl;
969  using LO = local_ordinal_type;
970  using GO = global_ordinal_type;
971  using GST = global_size_t;
972  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
973  const char funcName[] =
974  "Map(gblNumInds,entryList(Kokkos::View),indexBase,comm)";
975 
976  const bool verbose = Details::Behavior::verbose("Map");
977  std::unique_ptr<std::string> prefix;
978  if (verbose) {
979  prefix = Details::createPrefix(
980  comm_.getRawPtr(), "Map", funcName);
981  std::ostringstream os;
982  os << *prefix << "Start" << endl;
983  std::cerr << os.str();
984  }
988  checkMapInputArray ("(GST, Kokkos::View, GO, comm)",
989  entryList.data (),
990  static_cast<size_t> (entryList.extent (0)),
991  comm.getRawPtr ());
992 
993  // The user has specified the distribution of indices over the
994  // processes, via the input array of global indices on each
995  // process. The distribution is not necessarily contiguous or
996  // equally shared over the processes.
997 
998  // The length of the input array on this process is the number of
999  // local indices to associate with this process, even though the
1000  // input array contains global indices. We assume that the number
1001  // of local indices on a process can be stored in a size_t;
1002  // numLocalElements_ is a size_t, so this variable and that should
1003  // have the same type.
1004  const size_t numLocalElements(entryList.size());
1005 
1006  initialNonuniformDebugCheck(funcName, numGlobalElements,
1007  numLocalElements, indexBase, comm);
1008 
1009  // NOTE (mfh 20 Feb 2013, 10 Oct 2016) In some sense, this global
1010  // reduction is redundant, since the directory Map will have to do
1011  // the same thing. Thus, we could do the scan and broadcast for
1012  // the directory Map here, and give the computed offsets to the
1013  // directory Map's constructor. However, a reduction costs less
1014  // than a scan and broadcast, so this still saves time if users of
1015  // this Map don't ever need the Directory (i.e., if they never
1016  // call getRemoteIndexList on this Map).
1017  if (numGlobalElements != GSTI) {
1018  numGlobalElements_ = numGlobalElements; // Use the user's value.
1019  }
1020  else { // The user wants us to compute the sum.
1021  reduceAll(*comm, REDUCE_SUM,
1022  static_cast<GST>(numLocalElements),
1023  outArg(numGlobalElements_));
1024  }
1025 
1026 
1027  // mfh 20 Feb 2013: We've never quite done the right thing for
1028  // duplicate GIDs here. Duplicate GIDs have always been counted
1029  // distinctly in numLocalElements_, and thus should get a
1030  // different LID. However, we've always used std::map or a hash
1031  // table for the GID -> LID lookup table, so distinct GIDs always
1032  // map to the same LID. Furthermore, the order of the input GID
1033  // list matters, so it's not desirable to sort for determining
1034  // uniqueness.
1035  //
1036  // I've chosen for now to write this code as if the input GID list
1037  // contains no duplicates. If this is not desired, we could use
1038  // the lookup table itself to determine uniqueness: If we haven't
1039  // seen the GID before, it gets a new LID and it's added to the
1040  // LID -> GID and GID -> LID tables. If we have seen the GID
1041  // before, it doesn't get added to either table. I would
1042  // implement this, but it would cost more to do the double lookups
1043  // in the table (one to check, and one to insert).
1044  //
1045  // More importantly, since we build the GID -> LID table in (a
1046  // thread-) parallel (way), the order in which duplicate GIDs may
1047  // get inserted is not defined. This would make the assignment of
1048  // LID to GID nondeterministic.
1049 
1050  numLocalElements_ = numLocalElements;
1051  indexBase_ = indexBase;
1052 
1053  minMyGID_ = indexBase_;
1054  maxMyGID_ = indexBase_;
1055 
1056  // NOTE (mfh 27 May 2015): While finding the initial contiguous
1057  // GID range requires looking at all the GIDs in the range,
1058  // dismissing an interval of GIDs only requires looking at the
1059  // first and last GIDs. Thus, we could do binary search backwards
1060  // from the end in order to catch the common case of a contiguous
1061  // interval followed by noncontiguous entries. On the other hand,
1062  // we could just expose this case explicitly as yet another Map
1063  // constructor, and avoid the trouble of detecting it.
1064  if (numLocalElements_ > 0) {
1065  // Find contiguous GID range, with the restriction that the
1066  // beginning of the range starts with the first entry. While
1067  // doing so, fill in the LID -> GID table.
1068  typename decltype (lgMap_)::non_const_type lgMap
1069  (view_alloc ("lgMap", WithoutInitializing), numLocalElements_);
1070 
1071  // Because you can't use lambdas in constructors on CUDA. Or using private/protected data.
1072  // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
1073  Kokkos::deep_copy(typename device_type::execution_space(),lgMap,entryList);
1074  LO lastContiguousGID_loc;
1075  computeConstantsOnDevice(entryList,minMyGID_,maxMyGID_,firstContiguousGID_,lastContiguousGID_,lastContiguousGID_loc);
1076  LO firstNonContiguous_loc = lastContiguousGID_loc+1;
1077  auto nonContigGids = Kokkos::subview(entryList,std::pair<size_t,size_t>(firstNonContiguous_loc,entryList.extent(0)));
1078 
1079  // NOTE: We do not fill the glMapHost_ and lgMapHost_ views here. They will be filled lazily later
1080  glMap_ = global_to_local_table_type(nonContigGids,
1081  firstNonContiguous_loc);
1082 
1083  // "Commit" the local-to-global lookup table we filled in above.
1084  lgMap_ = lgMap;
1085 
1086  }
1087  else {
1088  minMyGID_ = std::numeric_limits<GlobalOrdinal>::max();
1089  maxMyGID_ = std::numeric_limits<GlobalOrdinal>::lowest();
1090  // This insures tests for GIDs in the range
1091  // [firstContiguousGID_, lastContiguousGID_] fail for processes
1092  // with no local elements.
1093  firstContiguousGID_ = indexBase_+1;
1094  lastContiguousGID_ = indexBase_;
1095  // glMap_ was default constructed, so it's already empty.
1096  }
1097 
1098 
1099  // Compute the min and max of all processes' GIDs. If
1100  // numLocalElements_ == 0 on this process, minMyGID_ and maxMyGID_
1101  // are both indexBase_. This is wrong, but fixing it would
1102  // require either a fancy sparse all-reduce, or a custom reduction
1103  // operator that ignores invalid values ("invalid" means
1104  // Tpetra::Details::OrdinalTraits<GO>::invalid()).
1105  //
1106  // Also, while we're at it, use the same all-reduce to figure out
1107  // if the Map is distributed. "Distributed" means that there is
1108  // at least one process with a number of local elements less than
1109  // the number of global elements.
1110  //
1111  // We're computing the min and max of all processes' GIDs using a
1112  // single MAX all-reduce, because min(x,y) = -max(-x,-y) (when x
1113  // and y are signed). (This lets us combine the min and max into
1114  // a single all-reduce.) If each process sets localDist=1 if its
1115  // number of local elements is strictly less than the number of
1116  // global elements, and localDist=0 otherwise, then a MAX
1117  // all-reduce on localDist tells us if the Map is distributed (1
1118  // if yes, 0 if no). Thus, we can append localDist onto the end
1119  // of the data and get the global result from the all-reduce.
1120  if (std::numeric_limits<GO>::is_signed) {
1121  // Does my process NOT own all the elements?
1122  const GO localDist =
1123  (as<GST> (numLocalElements_) < numGlobalElements_) ? 1 : 0;
1124 
1125  GO minMaxInput[3];
1126  minMaxInput[0] = -minMyGID_;
1127  minMaxInput[1] = maxMyGID_;
1128  minMaxInput[2] = localDist;
1129 
1130  GO minMaxOutput[3];
1131  minMaxOutput[0] = 0;
1132  minMaxOutput[1] = 0;
1133  minMaxOutput[2] = 0;
1134  reduceAll<int, GO> (*comm, REDUCE_MAX, 3, minMaxInput, minMaxOutput);
1135  minAllGID_ = -minMaxOutput[0];
1136  maxAllGID_ = minMaxOutput[1];
1137  const GO globalDist = minMaxOutput[2];
1138  distributed_ = (comm_->getSize () > 1 && globalDist == 1);
1139  }
1140  else { // unsigned; use two reductions
1141  // This is always correct, no matter the signedness of GO.
1142  reduceAll<int, GO> (*comm_, REDUCE_MIN, minMyGID_, outArg (minAllGID_));
1143  reduceAll<int, GO> (*comm_, REDUCE_MAX, maxMyGID_, outArg (maxAllGID_));
1144  distributed_ = checkIsDist ();
1145  }
1146 
1147  contiguous_ = false; // "Contiguous" is conservative.
1148 
1149  TEUCHOS_TEST_FOR_EXCEPTION(
1150  minAllGID_ < indexBase_,
1151  std::invalid_argument,
1152  "Tpetra::Map constructor (noncontiguous): "
1153  "Minimum global ID = " << minAllGID_ << " over all process(es) is "
1154  "less than the given indexBase = " << indexBase_ << ".");
1155 
1156  // Create the Directory on demand in getRemoteIndexList().
1157  //setupDirectory ();
1158 
1159  if (verbose) {
1160  std::ostringstream os;
1161  os << *prefix << "Done" << endl;
1162  std::cerr << os.str();
1163  }
1164  }
1165 
1166 
1167  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1169  {
1170  if (! Kokkos::is_initialized ()) {
1171  std::ostringstream os;
1172  os << "WARNING: Tpetra::Map destructor (~Map()) is being called after "
1173  "Kokkos::finalize() has been called. This is user error! There are "
1174  "two likely causes: " << std::endl <<
1175  " 1. You have a static Tpetra::Map (or RCP or shared_ptr of a Map)"
1176  << std::endl <<
1177  " 2. You declare and construct a Tpetra::Map (or RCP or shared_ptr "
1178  "of a Tpetra::Map) at the same scope in main() as Kokkos::finalize() "
1179  "or Tpetra::finalize()." << std::endl << std::endl <<
1180  "Don't do either of these! Please refer to GitHib Issue #2372."
1181  << std::endl;
1182  ::Tpetra::Details::printOnce (std::cerr, os.str (),
1183  this->getComm ().getRawPtr ());
1184  }
1185  else {
1186  using ::Tpetra::Details::mpiIsInitialized;
1187  using ::Tpetra::Details::mpiIsFinalized;
1188  using ::Tpetra::Details::teuchosCommIsAnMpiComm;
1189 
1190  Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getComm ();
1191  if (! comm.is_null () && teuchosCommIsAnMpiComm (*comm) &&
1192  mpiIsInitialized () && mpiIsFinalized ()) {
1193  // Tpetra itself does not require MPI, even if building with
1194  // MPI. It is legal to create Tpetra objects that do not use
1195  // MPI, even in an MPI program. However, calling Tpetra stuff
1196  // after MPI_Finalize() has been called is a bad idea, since
1197  // some Tpetra defaults may use MPI if available.
1198  std::ostringstream os;
1199  os << "WARNING: Tpetra::Map destructor (~Map()) is being called after "
1200  "MPI_Finalize() has been called. This is user error! There are "
1201  "two likely causes: " << std::endl <<
1202  " 1. You have a static Tpetra::Map (or RCP or shared_ptr of a Map)"
1203  << std::endl <<
1204  " 2. You declare and construct a Tpetra::Map (or RCP or shared_ptr "
1205  "of a Tpetra::Map) at the same scope in main() as MPI_finalize() or "
1206  "Tpetra::finalize()." << std::endl << std::endl <<
1207  "Don't do either of these! Please refer to GitHib Issue #2372."
1208  << std::endl;
1209  ::Tpetra::Details::printOnce (std::cerr, os.str (), comm.getRawPtr ());
1210  }
1211  }
1212  // mfh 20 Mar 2018: We can't check Tpetra::isInitialized() yet,
1213  // because Tpetra does not yet require Tpetra::initialize /
1214  // Tpetra::finalize.
1215  }
1216 
1217  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1218  bool
1220  {
1221  TEUCHOS_TEST_FOR_EXCEPTION(
1222  getComm ().is_null (), std::logic_error, "Tpetra::Map::isOneToOne: "
1223  "getComm() returns null. Please report this bug to the Tpetra "
1224  "developers.");
1225 
1226  // This is a collective operation, if it hasn't been called before.
1227  setupDirectory ();
1228  return directory_->isOneToOne (*this);
1229  }
1230 
1231  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1232  LocalOrdinal
1234  getLocalElement (GlobalOrdinal globalIndex) const
1235  {
1236  if (isContiguous ()) {
1237  if (globalIndex < getMinGlobalIndex () ||
1238  globalIndex > getMaxGlobalIndex ()) {
1239  return Tpetra::Details::OrdinalTraits<LocalOrdinal>::invalid ();
1240  }
1241  return static_cast<LocalOrdinal> (globalIndex - getMinGlobalIndex ());
1242  }
1243  else if (globalIndex >= firstContiguousGID_ &&
1244  globalIndex <= lastContiguousGID_) {
1245  return static_cast<LocalOrdinal> (globalIndex - firstContiguousGID_);
1246  }
1247  else {
1248  // If the given global index is not in the table, this returns
1249  // the same value as OrdinalTraits<LocalOrdinal>::invalid().
1250  // glMapHost_ is Host and does not assume UVM
1251  lazyPushToHost();
1252  return glMapHost_.get (globalIndex);
1253  }
1254  }
1255 
1256  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1257  GlobalOrdinal
1259  getGlobalElement (LocalOrdinal localIndex) const
1260  {
1261  if (localIndex < getMinLocalIndex () || localIndex > getMaxLocalIndex ()) {
1262  return Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ();
1263  }
1264  if (isContiguous ()) {
1265  return getMinGlobalIndex () + localIndex;
1266  }
1267  else {
1268  // This is a host Kokkos::View access, with no RCP or ArrayRCP
1269  // involvement. As a result, it is thread safe.
1270  //
1271  // lgMapHost_ is a host pointer; this does NOT assume UVM.
1272  lazyPushToHost();
1273  return lgMapHost_[localIndex];
1274  }
1275  }
1276 
1277  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1278  bool
1280  isNodeLocalElement (LocalOrdinal localIndex) const
1281  {
1282  if (localIndex < getMinLocalIndex () || localIndex > getMaxLocalIndex ()) {
1283  return false;
1284  } else {
1285  return true;
1286  }
1287  }
1288 
1289  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1290  bool
1292  isNodeGlobalElement (GlobalOrdinal globalIndex) const {
1293  return this->getLocalElement (globalIndex) !=
1294  Tpetra::Details::OrdinalTraits<LocalOrdinal>::invalid ();
1295  }
1296 
1297  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1299  return uniform_;
1300  }
1301 
1302  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1304  return contiguous_;
1305  }
1306 
1307 
1308  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1312  {
1313  return local_map_type (glMap_, lgMap_, getIndexBase (),
1314  getMinGlobalIndex (), getMaxGlobalIndex (),
1315  firstContiguousGID_, lastContiguousGID_,
1316  getLocalNumElements (), isContiguous ());
1317  }
1318 
1319  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1320  bool
1323  {
1324  using Teuchos::outArg;
1325  using Teuchos::REDUCE_MIN;
1326  using Teuchos::reduceAll;
1327  //
1328  // Tests that avoid the Boolean all-reduce below by using
1329  // globally consistent quantities.
1330  //
1331  if (this == &map) {
1332  // Pointer equality on one process always implies pointer
1333  // equality on all processes, since Map is immutable.
1334  return true;
1335  }
1336  else if (getComm ()->getSize () != map.getComm ()->getSize ()) {
1337  // The two communicators have different numbers of processes.
1338  // It's not correct to call isCompatible() in that case. This
1339  // may result in the all-reduce hanging below.
1340  return false;
1341  }
1342  else if (getGlobalNumElements () != map.getGlobalNumElements ()) {
1343  // Two Maps are definitely NOT compatible if they have different
1344  // global numbers of indices.
1345  return false;
1346  }
1347  else if (isContiguous () && isUniform () &&
1348  map.isContiguous () && map.isUniform ()) {
1349  // Contiguous uniform Maps with the same number of processes in
1350  // their communicators, and with the same global numbers of
1351  // indices, are always compatible.
1352  return true;
1353  }
1354  else if (! isContiguous () && ! map.isContiguous () &&
1355  lgMap_.extent (0) != 0 && map.lgMap_.extent (0) != 0 &&
1356  lgMap_.data () == map.lgMap_.data ()) {
1357  // Noncontiguous Maps whose global index lists are nonempty and
1358  // have the same pointer must be the same (and therefore
1359  // contiguous).
1360  //
1361  // Nonempty is important. For example, consider a communicator
1362  // with two processes, and two Maps that share this
1363  // communicator, with zero global indices on the first process,
1364  // and different nonzero numbers of global indices on the second
1365  // process. In that case, on the first process, the pointers
1366  // would both be NULL.
1367  return true;
1368  }
1369 
1370  TEUCHOS_TEST_FOR_EXCEPTION(
1371  getGlobalNumElements () != map.getGlobalNumElements (), std::logic_error,
1372  "Tpetra::Map::isCompatible: There's a bug in this method. We've already "
1373  "checked that this condition is true above, but it's false here. "
1374  "Please report this bug to the Tpetra developers.");
1375 
1376  // Do both Maps have the same number of indices on each process?
1377  const int locallyCompat =
1378  (getLocalNumElements () == map.getLocalNumElements ()) ? 1 : 0;
1379 
1380  int globallyCompat = 0;
1381  reduceAll<int, int> (*comm_, REDUCE_MIN, locallyCompat, outArg (globallyCompat));
1382  return (globallyCompat == 1);
1383  }
1384 
1385  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1386  bool
1389  {
1390  using Teuchos::ArrayView;
1391  using GO = global_ordinal_type;
1392  using size_type = typename ArrayView<const GO>::size_type;
1393 
1394  // If both Maps are contiguous, we can compare their GID ranges
1395  // easily by looking at the min and max GID on this process.
1396  // Otherwise, we'll compare their GID lists. If only one Map is
1397  // contiguous, then we only have to call getLocalElementList() on
1398  // the noncontiguous Map. (It's best to avoid calling it on a
1399  // contiguous Map, since it results in unnecessary storage that
1400  // persists for the lifetime of the Map.)
1401 
1402  if (this == &map) {
1403  // Pointer equality on one process always implies pointer
1404  // equality on all processes, since Map is immutable.
1405  return true;
1406  }
1407  else if (getLocalNumElements () != map.getLocalNumElements ()) {
1408  return false;
1409  }
1410  else if (getMinGlobalIndex () != map.getMinGlobalIndex () ||
1411  getMaxGlobalIndex () != map.getMaxGlobalIndex ()) {
1412  return false;
1413  }
1414  else {
1415  if (isContiguous ()) {
1416  if (map.isContiguous ()) {
1417  return true; // min and max match, so the ranges match.
1418  }
1419  else { // *this is contiguous, but map is not contiguous
1420  TEUCHOS_TEST_FOR_EXCEPTION(
1421  ! this->isContiguous () || map.isContiguous (), std::logic_error,
1422  "Tpetra::Map::locallySameAs: BUG");
1423  ArrayView<const GO> rhsElts = map.getLocalElementList ();
1424  const GO minLhsGid = this->getMinGlobalIndex ();
1425  const size_type numRhsElts = rhsElts.size ();
1426  for (size_type k = 0; k < numRhsElts; ++k) {
1427  const GO curLhsGid = minLhsGid + static_cast<GO> (k);
1428  if (curLhsGid != rhsElts[k]) {
1429  return false; // stop on first mismatch
1430  }
1431  }
1432  return true;
1433  }
1434  }
1435  else if (map.isContiguous ()) { // *this is not contiguous, but map is
1436  TEUCHOS_TEST_FOR_EXCEPTION(
1437  this->isContiguous () || ! map.isContiguous (), std::logic_error,
1438  "Tpetra::Map::locallySameAs: BUG");
1439  ArrayView<const GO> lhsElts = this->getLocalElementList ();
1440  const GO minRhsGid = map.getMinGlobalIndex ();
1441  const size_type numLhsElts = lhsElts.size ();
1442  for (size_type k = 0; k < numLhsElts; ++k) {
1443  const GO curRhsGid = minRhsGid + static_cast<GO> (k);
1444  if (curRhsGid != lhsElts[k]) {
1445  return false; // stop on first mismatch
1446  }
1447  }
1448  return true;
1449  }
1450  else if (this->lgMap_.data () == map.lgMap_.data ()) {
1451  // Pointers to LID->GID "map" (actually just an array) are the
1452  // same, and the number of GIDs are the same.
1453  return this->getLocalNumElements () == map.getLocalNumElements ();
1454  }
1455  else { // we actually have to compare the GIDs
1456  if (this->getLocalNumElements () != map.getLocalNumElements ()) {
1457  return false; // We already checked above, but check just in case
1458  }
1459  else {
1460  using range_type = Kokkos::RangePolicy<LocalOrdinal, typename node_type::execution_space>;
1461 
1462  auto lhsLclMap = getLocalMap();
1463  auto rhsLclMap = map.getLocalMap();
1464 
1465  LocalOrdinal numMismatchedElements = 0;
1466  Kokkos::parallel_reduce("Tpetra::Map::locallySameAs",
1467  range_type(0, this->getLocalNumElements()),
1468  KOKKOS_LAMBDA(const LocalOrdinal lid, LocalOrdinal& numMismatches) {
1469  if (lhsLclMap.getGlobalElement(lid) != rhsLclMap.getGlobalElement(lid))
1470  ++numMismatches;
1471  }, numMismatchedElements);
1472 
1473  return (numMismatchedElements == 0);
1474  }
1475  }
1476  }
1477  }
1478 
1479  template <class LocalOrdinal,class GlobalOrdinal, class Node>
1480  bool
1483  {
1484  if (this == &map)
1485  return true;
1486 
1487  // We are going to check if lmap1 is fitted into lmap2:
1488  // Is lmap1 (map) a subset of lmap2 (this)?
1489  // And do the first lmap1.getLocalNumElements() global elements
1490  // of lmap1,lmap2 owned on each process exactly match?
1491  auto lmap1 = map.getLocalMap();
1492  auto lmap2 = this->getLocalMap();
1493 
1494  auto numLocalElements1 = lmap1.getLocalNumElements();
1495  auto numLocalElements2 = lmap2.getLocalNumElements();
1496 
1497  if (numLocalElements1 > numLocalElements2) {
1498  // There are more indices in the first map on this process than in second map.
1499  return false;
1500  }
1501 
1502  if (lmap1.isContiguous () && lmap2.isContiguous ()) {
1503  // When both Maps are contiguous, just check the interval inclusion.
1504  return ((lmap1.getMinGlobalIndex () == lmap2.getMinGlobalIndex ()) &&
1505  (lmap1.getMaxGlobalIndex () <= lmap2.getMaxGlobalIndex ()));
1506  }
1507 
1508  if (lmap1.getMinGlobalIndex () < lmap2.getMinGlobalIndex () ||
1509  lmap1.getMaxGlobalIndex () > lmap2.getMaxGlobalIndex ()) {
1510  // The second map does not include the first map bounds, and thus some of
1511  // the first map global indices are not in the second map.
1512  return false;
1513  }
1514 
1515  using LO = local_ordinal_type;
1516  using range_type =
1517  Kokkos::RangePolicy<LO, typename node_type::execution_space>;
1518 
1519  // Check all elements.
1520  LO numDiff = 0;
1521  Kokkos::parallel_reduce(
1522  "isLocallyFitted",
1523  range_type(0, numLocalElements1),
1524  KOKKOS_LAMBDA (const LO i, LO& diff) {
1525  diff += (lmap1.getGlobalElement(i) != lmap2.getGlobalElement(i));
1526  }, numDiff);
1527 
1528  return (numDiff == 0);
1529  }
1530 
1531  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1532  bool
1535  {
1536  using Teuchos::outArg;
1537  using Teuchos::REDUCE_MIN;
1538  using Teuchos::reduceAll;
1539  //
1540  // Tests that avoid the Boolean all-reduce below by using
1541  // globally consistent quantities.
1542  //
1543  if (this == &map) {
1544  // Pointer equality on one process always implies pointer
1545  // equality on all processes, since Map is immutable.
1546  return true;
1547  }
1548  else if (getComm ()->getSize () != map.getComm ()->getSize ()) {
1549  // The two communicators have different numbers of processes.
1550  // It's not correct to call isSameAs() in that case. This
1551  // may result in the all-reduce hanging below.
1552  return false;
1553  }
1554  else if (getGlobalNumElements () != map.getGlobalNumElements ()) {
1555  // Two Maps are definitely NOT the same if they have different
1556  // global numbers of indices.
1557  return false;
1558  }
1559  else if (getMinAllGlobalIndex () != map.getMinAllGlobalIndex () ||
1560  getMaxAllGlobalIndex () != map.getMaxAllGlobalIndex () ||
1561  getIndexBase () != map.getIndexBase ()) {
1562  // If the global min or max global index doesn't match, or if
1563  // the index base doesn't match, then the Maps aren't the same.
1564  return false;
1565  }
1566  else if (isDistributed () != map.isDistributed ()) {
1567  // One Map is distributed and the other is not, which means that
1568  // the Maps aren't the same.
1569  return false;
1570  }
1571  else if (isContiguous () && isUniform () &&
1572  map.isContiguous () && map.isUniform ()) {
1573  // Contiguous uniform Maps with the same number of processes in
1574  // their communicators, with the same global numbers of indices,
1575  // and with matching index bases and ranges, must be the same.
1576  return true;
1577  }
1578 
1579  // The two communicators must have the same number of processes,
1580  // with process ranks occurring in the same order. This uses
1581  // MPI_COMM_COMPARE. The MPI 3.1 standard (Section 6.4) says:
1582  // "Operations that access communicators are local and their
1583  // execution does not require interprocess communication."
1584  // However, just to be sure, I'll put this call after the above
1585  // tests that don't communicate.
1586  if (! ::Tpetra::Details::congruent (*comm_, * (map.getComm ()))) {
1587  return false;
1588  }
1589 
1590  // If we get this far, we need to check local properties and then
1591  // communicate local sameness across all processes.
1592  const int isSame_lcl = locallySameAs (map) ? 1 : 0;
1593 
1594  // Return true if and only if all processes report local sameness.
1595  int isSame_gbl = 0;
1596  reduceAll<int, int> (*comm_, REDUCE_MIN, isSame_lcl, outArg (isSame_gbl));
1597  return isSame_gbl == 1;
1598  }
1599 
1600  namespace { // (anonymous)
1601  template <class LO, class GO, class DT>
1602  class FillLgMap {
1603  public:
1604  FillLgMap (const Kokkos::View<GO*, DT>& lgMap,
1605  const GO startGid) :
1606  lgMap_ (lgMap), startGid_ (startGid)
1607  {
1608  Kokkos::RangePolicy<LO, typename DT::execution_space>
1609  range (static_cast<LO> (0), static_cast<LO> (lgMap.size ()));
1610  Kokkos::parallel_for (range, *this);
1611  }
1612 
1613  KOKKOS_INLINE_FUNCTION void operator () (const LO& lid) const {
1614  lgMap_(lid) = startGid_ + static_cast<GO> (lid);
1615  }
1616 
1617  private:
1618  const Kokkos::View<GO*, DT> lgMap_;
1619  const GO startGid_;
1620  };
1621 
1622  } // namespace (anonymous)
1623 
1624 
1625  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1626  typename Map<LocalOrdinal,GlobalOrdinal,Node>::global_indices_array_type
1628  {
1629  using std::endl;
1630  using LO = local_ordinal_type;
1631  using GO = global_ordinal_type;
1632  using const_lg_view_type = decltype(lgMap_);
1633  using lg_view_type = typename const_lg_view_type::non_const_type;
1634  const bool debug = Details::Behavior::debug("Map");
1635  const bool verbose = Details::Behavior::verbose("Map");
1636 
1637  std::unique_ptr<std::string> prefix;
1638  if (verbose) {
1639  prefix = Details::createPrefix(
1640  comm_.getRawPtr(), "Map", "getMyGlobalIndices");
1641  std::ostringstream os;
1642  os << *prefix << "Start" << endl;
1643  std::cerr << os.str();
1644  }
1645 
1646  // If the local-to-global mapping doesn't exist yet, and if we
1647  // have local entries, then create and fill the local-to-global
1648  // mapping.
1649  const bool needToCreateLocalToGlobalMapping =
1650  lgMap_.extent (0) == 0 && numLocalElements_ > 0;
1651 
1652  if (needToCreateLocalToGlobalMapping) {
1653  if (verbose) {
1654  std::ostringstream os;
1655  os << *prefix << "Need to create lgMap" << endl;
1656  std::cerr << os.str();
1657  }
1658  if (debug) {
1659  // The local-to-global mapping should have been set up already
1660  // for a noncontiguous map.
1661  TEUCHOS_TEST_FOR_EXCEPTION
1662  (! isContiguous(), std::logic_error,
1663  "Tpetra::Map::getMyGlobalIndices: The local-to-global "
1664  "mapping (lgMap_) should have been set up already for a "
1665  "noncontiguous Map. Please report this bug to the Tpetra "
1666  "developers.");
1667  }
1668  const LO numElts = static_cast<LO> (getLocalNumElements ());
1669 
1670  using Kokkos::view_alloc;
1671  using Kokkos::WithoutInitializing;
1672  lg_view_type lgMap ("lgMap", numElts);
1673  if (verbose) {
1674  std::ostringstream os;
1675  os << *prefix << "Fill lgMap" << endl;
1676  std::cerr << os.str();
1677  }
1678  FillLgMap<LO, GO, device_type> fillIt (lgMap, minMyGID_);
1679 
1680  if (verbose) {
1681  std::ostringstream os;
1682  os << *prefix << "Copy lgMap to lgMapHost" << endl;
1683  std::cerr << os.str();
1684  }
1685 
1686  auto lgMapHost = Kokkos::create_mirror_view (Kokkos::HostSpace (), lgMap);
1687  // DEEP_COPY REVIEW - DEVICE-TO-HOST
1688  auto exec_instance = execution_space();
1689  Kokkos::deep_copy (exec_instance, lgMapHost, lgMap);
1690 
1691  // There's a non-trivial chance we'll grab this on the host,
1692  // so let's make sure the copy finishes
1693  exec_instance.fence();
1694 
1695  // "Commit" the local-to-global lookup table we filled in above.
1696  lgMap_ = lgMap;
1697  lgMapHost_ = lgMapHost;
1698  }
1699  else {
1700  lazyPushToHost();
1701  }
1702 
1703  if (verbose) {
1704  std::ostringstream os;
1705  os << *prefix << "Done" << endl;
1706  std::cerr << os.str();
1707  }
1708  return lgMapHost_;
1709  }
1710 
1711 
1712  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1713  typename Map<LocalOrdinal,GlobalOrdinal,Node>::global_indices_array_device_type
1715  {
1716  using std::endl;
1717  using LO = local_ordinal_type;
1718  using GO = global_ordinal_type;
1719  using const_lg_view_type = decltype(lgMap_);
1720  using lg_view_type = typename const_lg_view_type::non_const_type;
1721  const bool debug = Details::Behavior::debug("Map");
1722  const bool verbose = Details::Behavior::verbose("Map");
1723 
1724  std::unique_ptr<std::string> prefix;
1725  if (verbose) {
1726  prefix = Details::createPrefix(
1727  comm_.getRawPtr(), "Map", "getMyGlobalIndicesDevice");
1728  std::ostringstream os;
1729  os << *prefix << "Start" << endl;
1730  std::cerr << os.str();
1731  }
1732 
1733  // If the local-to-global mapping doesn't exist yet, and if we
1734  // have local entries, then create and fill the local-to-global
1735  // mapping.
1736  const bool needToCreateLocalToGlobalMapping =
1737  lgMap_.extent (0) == 0 && numLocalElements_ > 0;
1738 
1739  if (needToCreateLocalToGlobalMapping) {
1740  if (verbose) {
1741  std::ostringstream os;
1742  os << *prefix << "Need to create lgMap" << endl;
1743  std::cerr << os.str();
1744  }
1745  if (debug) {
1746  // The local-to-global mapping should have been set up already
1747  // for a noncontiguous map.
1748  TEUCHOS_TEST_FOR_EXCEPTION
1749  (! isContiguous(), std::logic_error,
1750  "Tpetra::Map::getMyGlobalIndices: The local-to-global "
1751  "mapping (lgMap_) should have been set up already for a "
1752  "noncontiguous Map. Please report this bug to the Tpetra "
1753  "developers.");
1754  }
1755  const LO numElts = static_cast<LO> (getLocalNumElements ());
1756 
1757  using Kokkos::view_alloc;
1758  using Kokkos::WithoutInitializing;
1759  lg_view_type lgMap ("lgMap", numElts);
1760  if (verbose) {
1761  std::ostringstream os;
1762  os << *prefix << "Fill lgMap" << endl;
1763  std::cerr << os.str();
1764  }
1765  FillLgMap<LO, GO, device_type> fillIt (lgMap, minMyGID_);
1766 
1767  // "Commit" the local-to-global lookup table we filled in above.
1768  lgMap_ = lgMap;
1769  }
1770 
1771  if (verbose) {
1772  std::ostringstream os;
1773  os << *prefix << "Done" << endl;
1774  std::cerr << os.str();
1775  }
1776  return lgMap_;
1777  }
1778 
1779 
1780 
1781  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1782  Teuchos::ArrayView<const GlobalOrdinal>
1784  {
1785  using GO = global_ordinal_type;
1786 
1787  // If the local-to-global mapping doesn't exist yet, and if we
1788  // have local entries, then create and fill the local-to-global
1789  // mapping.
1790  (void) this->getMyGlobalIndices ();
1791 
1792  // This does NOT assume UVM; lgMapHost_ is a host pointer.
1793  lazyPushToHost();
1794  const GO* lgMapHostRawPtr = lgMapHost_.data ();
1795  // The third argument forces ArrayView not to try to track memory
1796  // in a debug build. We have to use it because the memory does
1797  // not belong to a Teuchos memory management class.
1798  return Teuchos::ArrayView<const GO>(
1799  lgMapHostRawPtr,
1800  lgMapHost_.extent (0),
1801  Teuchos::RCP_DISABLE_NODE_LOOKUP);
1802  }
1803 
1804  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1806  return distributed_;
1807  }
1808 
1809  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1811  using Teuchos::TypeNameTraits;
1812  std::ostringstream os;
1813 
1814  os << "Tpetra::Map: {"
1815  << "LocalOrdinalType: " << TypeNameTraits<LocalOrdinal>::name ()
1816  << ", GlobalOrdinalType: " << TypeNameTraits<GlobalOrdinal>::name ()
1817  << ", NodeType: " << TypeNameTraits<Node>::name ();
1818  if (this->getObjectLabel () != "") {
1819  os << ", Label: \"" << this->getObjectLabel () << "\"";
1820  }
1821  os << ", Global number of entries: " << getGlobalNumElements ()
1822  << ", Number of processes: " << getComm ()->getSize ()
1823  << ", Uniform: " << (isUniform () ? "true" : "false")
1824  << ", Contiguous: " << (isContiguous () ? "true" : "false")
1825  << ", Distributed: " << (isDistributed () ? "true" : "false")
1826  << "}";
1827  return os.str ();
1828  }
1829 
1834  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1835  std::string
1837  localDescribeToString (const Teuchos::EVerbosityLevel vl) const
1838  {
1839  using LO = local_ordinal_type;
1840  using std::endl;
1841 
1842  // This preserves current behavior of Map.
1843  if (vl < Teuchos::VERB_HIGH) {
1844  return std::string ();
1845  }
1846  auto outStringP = Teuchos::rcp (new std::ostringstream ());
1847  Teuchos::RCP<Teuchos::FancyOStream> outp =
1848  Teuchos::getFancyOStream (outStringP);
1849  Teuchos::FancyOStream& out = *outp;
1850 
1851  auto comm = this->getComm ();
1852  const int myRank = comm->getRank ();
1853  const int numProcs = comm->getSize ();
1854  out << "Process " << myRank << " of " << numProcs << ":" << endl;
1855  Teuchos::OSTab tab1 (out);
1856 
1857  const LO numEnt = static_cast<LO> (this->getLocalNumElements ());
1858  out << "My number of entries: " << numEnt << endl
1859  << "My minimum global index: " << this->getMinGlobalIndex () << endl
1860  << "My maximum global index: " << this->getMaxGlobalIndex () << endl;
1861 
1862  if (vl == Teuchos::VERB_EXTREME) {
1863  out << "My global indices: [";
1864  const LO minLclInd = this->getMinLocalIndex ();
1865  for (LO k = 0; k < numEnt; ++k) {
1866  out << minLclInd + this->getGlobalElement (k);
1867  if (k + 1 < numEnt) {
1868  out << ", ";
1869  }
1870  }
1871  out << "]" << endl;
1872  }
1873 
1874  out.flush (); // make sure the ostringstream got everything
1875  return outStringP->str ();
1876  }
1877 
1878  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1879  void
1881  describe (Teuchos::FancyOStream &out,
1882  const Teuchos::EVerbosityLevel verbLevel) const
1883  {
1884  using Teuchos::TypeNameTraits;
1885  using Teuchos::VERB_DEFAULT;
1886  using Teuchos::VERB_NONE;
1887  using Teuchos::VERB_LOW;
1888  using Teuchos::VERB_HIGH;
1889  using std::endl;
1890  using LO = local_ordinal_type;
1891  using GO = global_ordinal_type;
1892  const Teuchos::EVerbosityLevel vl =
1893  (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
1894 
1895  if (vl == VERB_NONE) {
1896  return; // don't print anything
1897  }
1898  // If this Map's Comm is null, then the Map does not participate
1899  // in collective operations with the other processes. In that
1900  // case, it is not even legal to call this method. The reasonable
1901  // thing to do in that case is nothing.
1902  auto comm = this->getComm ();
1903  if (comm.is_null ()) {
1904  return;
1905  }
1906  const int myRank = comm->getRank ();
1907  const int numProcs = comm->getSize ();
1908 
1909  // Only Process 0 should touch the output stream, but this method
1910  // in general may need to do communication. Thus, we may need to
1911  // preserve the current tab level across multiple "if (myRank ==
1912  // 0) { ... }" inner scopes. This is why we sometimes create
1913  // OSTab instances by pointer, instead of by value. We only need
1914  // to create them by pointer if the tab level must persist through
1915  // multiple inner scopes.
1916  Teuchos::RCP<Teuchos::OSTab> tab0, tab1;
1917 
1918  if (myRank == 0) {
1919  // At every verbosity level but VERB_NONE, Process 0 prints.
1920  // By convention, describe() always begins with a tab before
1921  // printing.
1922  tab0 = Teuchos::rcp (new Teuchos::OSTab (out));
1923  out << "\"Tpetra::Map\":" << endl;
1924  tab1 = Teuchos::rcp (new Teuchos::OSTab (out));
1925  {
1926  out << "Template parameters:" << endl;
1927  Teuchos::OSTab tab2 (out);
1928  out << "LocalOrdinal: " << TypeNameTraits<LO>::name () << endl
1929  << "GlobalOrdinal: " << TypeNameTraits<GO>::name () << endl
1930  << "Node: " << TypeNameTraits<Node>::name () << endl;
1931  }
1932  const std::string label = this->getObjectLabel ();
1933  if (label != "") {
1934  out << "Label: \"" << label << "\"" << endl;
1935  }
1936  out << "Global number of entries: " << getGlobalNumElements () << endl
1937  << "Minimum global index: " << getMinAllGlobalIndex () << endl
1938  << "Maximum global index: " << getMaxAllGlobalIndex () << endl
1939  << "Index base: " << getIndexBase () << endl
1940  << "Number of processes: " << numProcs << endl
1941  << "Uniform: " << (isUniform () ? "true" : "false") << endl
1942  << "Contiguous: " << (isContiguous () ? "true" : "false") << endl
1943  << "Distributed: " << (isDistributed () ? "true" : "false") << endl;
1944  }
1945 
1946  // This is collective over the Map's communicator.
1947  if (vl >= VERB_HIGH) { // VERB_HIGH or VERB_EXTREME
1948  const std::string lclStr = this->localDescribeToString (vl);
1949  Tpetra::Details::gathervPrint (out, lclStr, *comm);
1950  }
1951  }
1952 
1953  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1954  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
1956  replaceCommWithSubset (const Teuchos::RCP<const Teuchos::Comm<int> >& newComm) const
1957  {
1958  using Teuchos::RCP;
1959  using Teuchos::rcp;
1960  using GST = global_size_t;
1961  using LO = local_ordinal_type;
1962  using GO = global_ordinal_type;
1963  using map_type = Map<LO, GO, Node>;
1964 
1965  // mfh 26 Mar 2013: The lazy way to do this is simply to recreate
1966  // the Map by calling its ordinary public constructor, using the
1967  // original Map's data. This only involves O(1) all-reduces over
1968  // the new communicator, which in the common case only includes a
1969  // small number of processes.
1970 
1971  // Create the Map to return.
1972  if (newComm.is_null () || newComm->getSize () < 1) {
1973  return Teuchos::null; // my process does not participate in the new Map
1974  }
1975  else if (newComm->getSize () == 1) {
1976  lazyPushToHost();
1977 
1978  // The case where the new communicator has only one process is
1979  // easy. We don't have to communicate to get all the
1980  // information we need. Use the default comm to create the new
1981  // Map, then fill in all the fields directly.
1982  RCP<map_type> newMap (new map_type ());
1983 
1984  newMap->comm_ = newComm;
1985  // mfh 07 Oct 2016: Preserve original behavior, even though the
1986  // original index base may no longer be the globally min global
1987  // index. See #616 for why this doesn't matter so much anymore.
1988  newMap->indexBase_ = this->indexBase_;
1989  newMap->numGlobalElements_ = this->numLocalElements_;
1990  newMap->numLocalElements_ = this->numLocalElements_;
1991  newMap->minMyGID_ = this->minMyGID_;
1992  newMap->maxMyGID_ = this->maxMyGID_;
1993  newMap->minAllGID_ = this->minMyGID_;
1994  newMap->maxAllGID_ = this->maxMyGID_;
1995  newMap->firstContiguousGID_ = this->firstContiguousGID_;
1996  newMap->lastContiguousGID_ = this->lastContiguousGID_;
1997  // Since the new communicator has only one process, neither
1998  // uniformity nor contiguity have changed.
1999  newMap->uniform_ = this->uniform_;
2000  newMap->contiguous_ = this->contiguous_;
2001  // The new communicator only has one process, so the new Map is
2002  // not distributed.
2003  newMap->distributed_ = false;
2004  newMap->lgMap_ = this->lgMap_;
2005  newMap->lgMapHost_ = this->lgMapHost_;
2006  newMap->glMap_ = this->glMap_;
2007  newMap->glMapHost_ = this->glMapHost_;
2008  // It's OK not to initialize the new Map's Directory.
2009  // This is initialized lazily, on first call to getRemoteIndexList.
2010 
2011  return newMap;
2012  }
2013  else { // newComm->getSize() != 1
2014  // Even if the original Map is contiguous, the new Map might not
2015  // be, especially if the excluded processes have ranks != 0 or
2016  // newComm->getSize()-1. The common case for this method is to
2017  // exclude many (possibly even all but one) processes, so it
2018  // likely doesn't pay to do the global communication (over the
2019  // original communicator) to figure out whether we can optimize
2020  // the result Map. Thus, we just set up the result Map as
2021  // noncontiguous.
2022  //
2023  // TODO (mfh 07 Oct 2016) We don't actually need to reconstruct
2024  // the global-to-local table, etc. Optimize this code path to
2025  // avoid unnecessary local work.
2026 
2027  // Make Map (re)compute the global number of elements.
2028  const GST RECOMPUTE = Tpetra::Details::OrdinalTraits<GST>::invalid ();
2029  // TODO (mfh 07 Oct 2016) If we use any Map constructor, we have
2030  // to use the noncontiguous Map constructor, since the new Map
2031  // might not be contiguous. Even if the old Map was contiguous,
2032  // some process in the "middle" might have been excluded. If we
2033  // want to avoid local work, we either have to do the setup by
2034  // hand, or write a new Map constructor.
2035 #if 1
2036  // The disabled code here throws the following exception in
2037  // Map's replaceCommWithSubset test:
2038  //
2039  // Throw test that evaluated to true: static_cast<unsigned long long> (numKeys) > static_cast<unsigned long long> (::Kokkos::ArithTraits<ValueType>::max ())
2040  // 10:
2041  // 10: Tpetra::Details::FixedHashTable: The number of keys -3 is greater than the maximum representable ValueType value 2147483647. This means that it is not possible to use this constructor.
2042  // 10: Process 3: origComm->replaceCommWithSubset(subsetComm) threw an exception: /scratch/prj/Trilinos/Trilinos/packages/tpetra/core/src/Tpetra_Details_FixedHashTable_def.hpp:1044:
2043 
2044  auto lgMap = this->getMyGlobalIndices ();
2045  using size_type =
2046  typename std::decay<decltype (lgMap.extent (0)) >::type;
2047  const size_type lclNumInds =
2048  static_cast<size_type> (this->getLocalNumElements ());
2049  using Teuchos::TypeNameTraits;
2050  TEUCHOS_TEST_FOR_EXCEPTION
2051  (lgMap.extent (0) != lclNumInds, std::logic_error,
2052  "Tpetra::Map::replaceCommWithSubset: Result of getMyGlobalIndices() "
2053  "has length " << lgMap.extent (0) << " (of type " <<
2054  TypeNameTraits<size_type>::name () << ") != this->getLocalNumElements()"
2055  " = " << this->getLocalNumElements () << ". The latter, upon being "
2056  "cast to size_type = " << TypeNameTraits<size_type>::name () << ", "
2057  "becomes " << lclNumInds << ". Please report this bug to the Tpetra "
2058  "developers.");
2059 #else
2060  Teuchos::ArrayView<const GO> lgMap = this->getLocalElementList ();
2061 #endif // 1
2062 
2063  const GO indexBase = this->getIndexBase ();
2064  // map stores HostSpace of CudaSpace but constructor is still CudaUVMSpace
2065  auto lgMap_device = Kokkos::create_mirror_view_and_copy(device_type(), lgMap);
2066  return rcp (new map_type (RECOMPUTE, lgMap_device, indexBase, newComm));
2067  }
2068  }
2069 
2070  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2071  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
2074  {
2075  using Teuchos::Comm;
2076  using Teuchos::null;
2077  using Teuchos::outArg;
2078  using Teuchos::RCP;
2079  using Teuchos::rcp;
2080  using Teuchos::REDUCE_MIN;
2081  using Teuchos::reduceAll;
2082 
2083  // Create the new communicator. split() returns a valid
2084  // communicator on all processes. On processes where color == 0,
2085  // ignore the result. Passing key == 0 tells MPI to order the
2086  // processes in the new communicator by their rank in the old
2087  // communicator.
2088  const int color = (numLocalElements_ == 0) ? 0 : 1;
2089  // MPI_Comm_split must be called collectively over the original
2090  // communicator. We can't just call it on processes with color
2091  // one, even though we will ignore its result on processes with
2092  // color zero.
2093  RCP<const Comm<int> > newComm = comm_->split (color, 0);
2094  if (color == 0) {
2095  newComm = null;
2096  }
2097 
2098  // Create the Map to return.
2099  if (newComm.is_null ()) {
2100  return null; // my process does not participate in the new Map
2101  } else {
2102  RCP<Map> map = rcp (new Map ());
2103 
2104  map->comm_ = newComm;
2105  map->indexBase_ = indexBase_;
2106  map->numGlobalElements_ = numGlobalElements_;
2107  map->numLocalElements_ = numLocalElements_;
2108  map->minMyGID_ = minMyGID_;
2109  map->maxMyGID_ = maxMyGID_;
2110  map->minAllGID_ = minAllGID_;
2111  map->maxAllGID_ = maxAllGID_;
2112  map->firstContiguousGID_= firstContiguousGID_;
2113  map->lastContiguousGID_ = lastContiguousGID_;
2114 
2115  // Uniformity and contiguity have not changed. The directory
2116  // has changed, but we've taken care of that above.
2117  map->uniform_ = uniform_;
2118  map->contiguous_ = contiguous_;
2119 
2120  // If the original Map was NOT distributed, then the new Map
2121  // cannot be distributed.
2122  //
2123  // If the number of processes in the new communicator is 1, then
2124  // the new Map is not distributed.
2125  //
2126  // Otherwise, we have to check the new Map using an all-reduce
2127  // (over the new communicator). For example, the original Map
2128  // may have had some processes with zero elements, and all other
2129  // processes with the same number of elements as in the whole
2130  // Map. That Map is technically distributed, because of the
2131  // processes with zero elements. Removing those processes would
2132  // make the new Map locally replicated.
2133  if (! distributed_ || newComm->getSize () == 1) {
2134  map->distributed_ = false;
2135  } else {
2136  const int iOwnAllGids = (numLocalElements_ == numGlobalElements_) ? 1 : 0;
2137  int allProcsOwnAllGids = 0;
2138  reduceAll<int, int> (*newComm, REDUCE_MIN, iOwnAllGids, outArg (allProcsOwnAllGids));
2139  map->distributed_ = (allProcsOwnAllGids == 1) ? false : true;
2140  }
2141 
2142  map->lgMap_ = lgMap_;
2143  map->lgMapHost_ = lgMapHost_;
2144  map->glMap_ = glMap_;
2145  map->glMapHost_ = glMapHost_;
2146 
2147  // Map's default constructor creates an uninitialized Directory.
2148  // The Directory will be initialized on demand in
2149  // getRemoteIndexList().
2150  //
2151  // FIXME (mfh 26 Mar 2013) It should be possible to "filter" the
2152  // directory more efficiently than just recreating it. If
2153  // directory recreation proves a bottleneck, we can always
2154  // revisit this. On the other hand, Directory creation is only
2155  // collective over the new, presumably much smaller
2156  // communicator, so it may not be worth the effort to optimize.
2157 
2158  return map;
2159  }
2160  }
2161 
2162  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2163  void
2165  {
2166  TEUCHOS_TEST_FOR_EXCEPTION(
2167  directory_.is_null (), std::logic_error, "Tpetra::Map::setupDirectory: "
2168  "The Directory is null. "
2169  "Please report this bug to the Tpetra developers.");
2170 
2171  // Only create the Directory if it hasn't been created yet.
2172  // This is a collective operation.
2173  if (! directory_->initialized ()) {
2174  directory_->initialize (*this);
2175  }
2176  }
2177 
2178  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2179  LookupStatus
2181  getRemoteIndexList (const Teuchos::ArrayView<const GlobalOrdinal>& GIDs,
2182  const Teuchos::ArrayView<int>& PIDs,
2183  const Teuchos::ArrayView<LocalOrdinal>& LIDs) const
2184  {
2185  using Tpetra::Details::OrdinalTraits;
2187  using std::endl;
2188  using size_type = Teuchos::ArrayView<int>::size_type;
2189 
2190  const bool verbose = Details::Behavior::verbose("Map");
2191  const size_t maxNumToPrint = verbose ?
2193  std::unique_ptr<std::string> prefix;
2194  if (verbose) {
2195  prefix = Details::createPrefix(comm_.getRawPtr(),
2196  "Map", "getRemoteIndexList(GIDs,PIDs,LIDs)");
2197  std::ostringstream os;
2198  os << *prefix << "Start: ";
2199  verbosePrintArray(os, GIDs, "GIDs", maxNumToPrint);
2200  os << endl;
2201  std::cerr << os.str();
2202  }
2203 
2204  // Empty Maps (i.e., containing no indices on any processes in the
2205  // Map's communicator) are perfectly valid. In that case, if the
2206  // input GID list is nonempty, we fill the output arrays with
2207  // invalid values, and return IDNotPresent to notify the caller.
2208  // It's perfectly valid to give getRemoteIndexList GIDs that the
2209  // Map doesn't own. SubmapImport test 2 needs this functionality.
2210  if (getGlobalNumElements () == 0) {
2211  if (GIDs.size () == 0) {
2212  if (verbose) {
2213  std::ostringstream os;
2214  os << *prefix << "Done; both Map & input are empty" << endl;
2215  std::cerr << os.str();
2216  }
2217  return AllIDsPresent; // trivially
2218  }
2219  else {
2220  if (verbose) {
2221  std::ostringstream os;
2222  os << *prefix << "Done: Map is empty on all processes, "
2223  "so all output PIDs & LIDs are invalid (-1)." << endl;
2224  std::cerr << os.str();
2225  }
2226  for (size_type k = 0; k < PIDs.size (); ++k) {
2227  PIDs[k] = OrdinalTraits<int>::invalid ();
2228  }
2229  for (size_type k = 0; k < LIDs.size (); ++k) {
2230  LIDs[k] = OrdinalTraits<LocalOrdinal>::invalid ();
2231  }
2232  return IDNotPresent;
2233  }
2234  }
2235 
2236  // getRemoteIndexList must be called collectively, and Directory
2237  // initialization is collective too, so it's OK to initialize the
2238  // Directory on demand.
2239 
2240  if (verbose) {
2241  std::ostringstream os;
2242  os << *prefix << "Call setupDirectory" << endl;
2243  std::cerr << os.str();
2244  }
2245  setupDirectory();
2246  if (verbose) {
2247  std::ostringstream os;
2248  os << *prefix << "Call directory_->getDirectoryEntries" << endl;
2249  std::cerr << os.str();
2250  }
2251  const Tpetra::LookupStatus retVal =
2252  directory_->getDirectoryEntries (*this, GIDs, PIDs, LIDs);
2253  if (verbose) {
2254  std::ostringstream os;
2255  os << *prefix << "Done; getDirectoryEntries returned "
2256  << (retVal == IDNotPresent ? "IDNotPresent" : "AllIDsPresent")
2257  << "; ";
2258  verbosePrintArray(os, PIDs, "PIDs", maxNumToPrint);
2259  os << ", ";
2260  verbosePrintArray(os, LIDs, "LIDs", maxNumToPrint);
2261  os << endl;
2262  std::cerr << os.str();
2263  }
2264  return retVal;
2265  }
2266 
2267  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2268  LookupStatus
2270  getRemoteIndexList (const Teuchos::ArrayView<const GlobalOrdinal> & GIDs,
2271  const Teuchos::ArrayView<int> & PIDs) const
2272  {
2274  using std::endl;
2275 
2276  const bool verbose = Details::Behavior::verbose("Map");
2277  const size_t maxNumToPrint = verbose ?
2279  std::unique_ptr<std::string> prefix;
2280  if (verbose) {
2281  prefix = Details::createPrefix(comm_.getRawPtr(),
2282  "Map", "getRemoteIndexList(GIDs,PIDs)");
2283  std::ostringstream os;
2284  os << *prefix << "Start: ";
2285  verbosePrintArray(os, GIDs, "GIDs", maxNumToPrint);
2286  os << endl;
2287  std::cerr << os.str();
2288  }
2289 
2290  if (getGlobalNumElements () == 0) {
2291  if (GIDs.size () == 0) {
2292  if (verbose) {
2293  std::ostringstream os;
2294  os << *prefix << "Done; both Map & input are empty" << endl;
2295  std::cerr << os.str();
2296  }
2297  return AllIDsPresent; // trivially
2298  }
2299  else {
2300  if (verbose) {
2301  std::ostringstream os;
2302  os << *prefix << "Done: Map is empty on all processes, "
2303  "so all output PIDs are invalid (-1)." << endl;
2304  std::cerr << os.str();
2305  }
2306  for (Teuchos::ArrayView<int>::size_type k = 0; k < PIDs.size (); ++k) {
2307  PIDs[k] = Tpetra::Details::OrdinalTraits<int>::invalid ();
2308  }
2309  return IDNotPresent;
2310  }
2311  }
2312 
2313  // getRemoteIndexList must be called collectively, and Directory
2314  // initialization is collective too, so it's OK to initialize the
2315  // Directory on demand.
2316 
2317  if (verbose) {
2318  std::ostringstream os;
2319  os << *prefix << "Call setupDirectory" << endl;
2320  std::cerr << os.str();
2321  }
2322  setupDirectory();
2323  if (verbose) {
2324  std::ostringstream os;
2325  os << *prefix << "Call directory_->getDirectoryEntries" << endl;
2326  std::cerr << os.str();
2327  }
2328  const Tpetra::LookupStatus retVal =
2329  directory_->getDirectoryEntries(*this, GIDs, PIDs);
2330  if (verbose) {
2331  std::ostringstream os;
2332  os << *prefix << "Done; getDirectoryEntries returned "
2333  << (retVal == IDNotPresent ? "IDNotPresent" : "AllIDsPresent")
2334  << "; ";
2335  verbosePrintArray(os, PIDs, "PIDs", maxNumToPrint);
2336  os << endl;
2337  std::cerr << os.str();
2338  }
2339  return retVal;
2340  }
2341 
2342  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2343  void
2345  using exec_space = typename Node::device_type::execution_space;
2346  if(lgMap_.extent(0) != lgMapHost_.extent(0)) {
2347  Tpetra::Details::ProfilingRegion pr("Map::lazyPushToHost() - pushing data");
2348  // NOTE: We check lgMap_ and not glMap_, since the latter can
2349  // be somewhat error prone for contiguous maps
2350 
2351  // create_mirror_view preserves const-ness. create_mirror does not
2352  auto lgMap_host = Kokkos::create_mirror(Kokkos::HostSpace (), lgMap_);
2353 
2354  // Since this was computed on the default stream, we can copy on the stream and then fence
2355  // the stream
2356  Kokkos::deep_copy(exec_space(),lgMap_host,lgMap_);
2357  exec_space().fence();
2358  lgMapHost_ = lgMap_host;
2359 
2360  // Make host version - when memory spaces match these just do trivial assignment
2361  glMapHost_ = global_to_local_table_host_type(glMap_);
2362  }
2363  }
2364 
2365 
2366  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2367  Teuchos::RCP<const Teuchos::Comm<int> >
2369  return comm_;
2370  }
2371 
2372  template <class LocalOrdinal,class GlobalOrdinal, class Node>
2373  bool
2375  checkIsDist() const
2376  {
2377  using Teuchos::as;
2378  using Teuchos::outArg;
2379  using Teuchos::REDUCE_MIN;
2380  using Teuchos::reduceAll;
2381  using std::endl;
2382 
2383  const bool verbose = Details::Behavior::verbose("Map");
2384  std::unique_ptr<std::string> prefix;
2385  if (verbose) {
2386  prefix = Details::createPrefix(
2387  comm_.getRawPtr(), "Map", "checkIsDist");
2388  std::ostringstream os;
2389  os << *prefix << "Start" << endl;
2390  std::cerr << os.str();
2391  }
2392 
2393  bool global = false;
2394  if (comm_->getSize () > 1) {
2395  // The communicator has more than one process, but that doesn't
2396  // necessarily mean the Map is distributed.
2397  int localRep = 0;
2398  if (numGlobalElements_ == as<global_size_t> (numLocalElements_)) {
2399  // The number of local elements on this process equals the
2400  // number of global elements.
2401  //
2402  // NOTE (mfh 22 Nov 2011) Does this still work if there were
2403  // duplicates in the global ID list on input (the third Map
2404  // constructor), so that the number of local elements (which
2405  // are not duplicated) on this process could be less than the
2406  // number of global elements, even if this process owns all
2407  // the elements?
2408  localRep = 1;
2409  }
2410  int allLocalRep;
2411  reduceAll<int, int> (*comm_, REDUCE_MIN, localRep, outArg (allLocalRep));
2412  if (allLocalRep != 1) {
2413  // At least one process does not own all the elements.
2414  // This makes the Map a distributed Map.
2415  global = true;
2416  }
2417  }
2418  // If the communicator has only one process, then the Map is not
2419  // distributed.
2420 
2421  if (verbose) {
2422  std::ostringstream os;
2423  os << *prefix << "Done; global=" << (global ? "true" : "false")
2424  << endl;
2425  std::cerr << os.str();
2426  }
2427  return global;
2428  }
2429 
2430 } // namespace Tpetra
2431 
2432 template <class LocalOrdinal, class GlobalOrdinal>
2433 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal> >
2434 Tpetra::createLocalMap (const size_t numElements,
2435  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2436 {
2437  typedef LocalOrdinal LO;
2438  typedef GlobalOrdinal GO;
2439  using NT = typename ::Tpetra::Map<LO, GO>::node_type;
2440  return createLocalMapWithNode<LO, GO, NT> (numElements, comm);
2441 }
2442 
2443 template <class LocalOrdinal, class GlobalOrdinal>
2444 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal> >
2445 Tpetra::createUniformContigMap (const global_size_t numElements,
2446  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2447 {
2448  typedef LocalOrdinal LO;
2449  typedef GlobalOrdinal GO;
2450  using NT = typename ::Tpetra::Map<LO, GO>::node_type;
2451  return createUniformContigMapWithNode<LO, GO, NT> (numElements, comm);
2452 }
2453 
2454 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2455 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node> >
2456 Tpetra::createUniformContigMapWithNode (const global_size_t numElements,
2457  const Teuchos::RCP<const Teuchos::Comm<int> >& comm
2458 )
2459 {
2460  using Teuchos::rcp;
2462  const GlobalOrdinal indexBase = static_cast<GlobalOrdinal> (0);
2463 
2464  return rcp (new map_type (numElements, indexBase, comm, GloballyDistributed));
2465 }
2466 
2467 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2468 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node> >
2469 Tpetra::createLocalMapWithNode (const size_t numElements,
2470  const Teuchos::RCP<const Teuchos::Comm<int> >& comm
2471 )
2472 {
2473  using Tpetra::global_size_t;
2474  using Teuchos::rcp;
2476  const GlobalOrdinal indexBase = 0;
2477  const global_size_t globalNumElts = static_cast<global_size_t> (numElements);
2478 
2479  return rcp (new map_type (globalNumElts, indexBase, comm, LocallyReplicated));
2480 }
2481 
2482 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2483 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node> >
2485  const size_t localNumElements,
2486  const Teuchos::RCP<const Teuchos::Comm<int> >& comm
2487 )
2488 {
2489  using Teuchos::rcp;
2491  const GlobalOrdinal indexBase = 0;
2492 
2493  return rcp (new map_type (numElements, localNumElements, indexBase, comm));
2494 }
2495 
2496 template <class LocalOrdinal, class GlobalOrdinal>
2497 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal> >
2499  const size_t localNumElements,
2500  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2501 {
2502  typedef LocalOrdinal LO;
2503  typedef GlobalOrdinal GO;
2504  using NT = typename Tpetra::Map<LO, GO>::node_type;
2505 
2506  return Tpetra::createContigMapWithNode<LO, GO, NT> (numElements, localNumElements, comm);
2507 }
2508 
2509 template <class LocalOrdinal, class GlobalOrdinal>
2510 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal> >
2511 Tpetra::createNonContigMap(const Teuchos::ArrayView<const GlobalOrdinal>& elementList,
2512  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2513 {
2514  typedef LocalOrdinal LO;
2515  typedef GlobalOrdinal GO;
2516  using NT = typename Tpetra::Map<LO, GO>::node_type;
2517 
2518  return Tpetra::createNonContigMapWithNode<LO, GO, NT> (elementList, comm);
2519 }
2520 
2521 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2522 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node> >
2523 Tpetra::createNonContigMapWithNode (const Teuchos::ArrayView<const GlobalOrdinal>& elementList,
2524  const Teuchos::RCP<const Teuchos::Comm<int> >& comm
2525 )
2526 {
2527  using Teuchos::rcp;
2529  using GST = Tpetra::global_size_t;
2530  const GST INV = Tpetra::Details::OrdinalTraits<GST>::invalid ();
2531  // FIXME (mfh 22 Jul 2016) This is what I found here, but maybe this
2532  // shouldn't be zero, given that the index base is supposed to equal
2533  // the globally min global index?
2534  const GlobalOrdinal indexBase = 0;
2535 
2536  return rcp (new map_type (INV, elementList, indexBase, comm));
2537 }
2538 
2539 template<class LO, class GO, class NT>
2540 Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >
2541 Tpetra::createOneToOne (const Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >& M)
2542 {
2544  using Teuchos::Array;
2545  using Teuchos::ArrayView;
2546  using Teuchos::as;
2547  using Teuchos::rcp;
2548  using std::cerr;
2549  using std::endl;
2550  using map_type = Tpetra::Map<LO, GO, NT>;
2551  using GST = global_size_t;
2552 
2553  const bool verbose = Details::Behavior::verbose("Map");
2554  std::unique_ptr<std::string> prefix;
2555  if (verbose) {
2556  auto comm = M.is_null() ? Teuchos::null : M->getComm();
2557  prefix = Details::createPrefix(
2558  comm.getRawPtr(), "createOneToOne(Map)");
2559  std::ostringstream os;
2560  os << *prefix << "Start" << endl;
2561  cerr << os.str();
2562  }
2563  const size_t maxNumToPrint = verbose ?
2564  Details::Behavior::verbosePrintCountThreshold() : size_t(0);
2565  const GST GINV = Tpetra::Details::OrdinalTraits<GST>::invalid ();
2566  const int myRank = M->getComm ()->getRank ();
2567 
2568  // Bypasses for special cases where either M is known to be
2569  // one-to-one, or the one-to-one version of M is easy to compute.
2570  // This is why we take M as an RCP, not as a const reference -- so
2571  // that we can return M itself if it is 1-to-1.
2572  if (! M->isDistributed ()) {
2573  // For a locally replicated Map, we assume that users want to push
2574  // all the GIDs to Process 0.
2575 
2576  // mfh 05 Nov 2013: getGlobalNumElements() does indeed return what
2577  // you think it should return, in this special case of a locally
2578  // replicated contiguous Map.
2579  const GST numGlobalEntries = M->getGlobalNumElements ();
2580  if (M->isContiguous()) {
2581  const size_t numLocalEntries =
2582  (myRank == 0) ? as<size_t>(numGlobalEntries) : size_t(0);
2583  if (verbose) {
2584  std::ostringstream os;
2585  os << *prefix << "Input is locally replicated & contiguous; "
2586  "numLocalEntries=" << numLocalEntries << endl;
2587  cerr << os.str ();
2588  }
2589  auto retMap =
2590  rcp(new map_type(numGlobalEntries, numLocalEntries,
2591  M->getIndexBase(), M->getComm()));
2592  if (verbose) {
2593  std::ostringstream os;
2594  os << *prefix << "Done" << endl;
2595  cerr << os.str ();
2596  }
2597  return retMap;
2598  }
2599  else {
2600  if (verbose) {
2601  std::ostringstream os;
2602  os << *prefix << "Input is locally replicated & noncontiguous"
2603  << endl;
2604  cerr << os.str ();
2605  }
2606  ArrayView<const GO> myGids =
2607  (myRank == 0) ? M->getLocalElementList() : Teuchos::null;
2608  auto retMap =
2609  rcp(new map_type(GINV, myGids(), M->getIndexBase(),
2610  M->getComm()));
2611  if (verbose) {
2612  std::ostringstream os;
2613  os << *prefix << "Done" << endl;
2614  cerr << os.str ();
2615  }
2616  return retMap;
2617  }
2618  }
2619  else if (M->isContiguous ()) {
2620  if (verbose) {
2621  std::ostringstream os;
2622  os << *prefix << "Input is distributed & contiguous" << endl;
2623  cerr << os.str ();
2624  }
2625  // Contiguous, distributed Maps are one-to-one by construction.
2626  // (Locally replicated Maps can be contiguous.)
2627  return M;
2628  }
2629  else {
2630  if (verbose) {
2631  std::ostringstream os;
2632  os << *prefix << "Input is distributed & noncontiguous" << endl;
2633  cerr << os.str ();
2634  }
2636  const size_t numMyElems = M->getLocalNumElements ();
2637  ArrayView<const GO> myElems = M->getLocalElementList ();
2638  Array<int> owner_procs_vec (numMyElems);
2639 
2640  if (verbose) {
2641  std::ostringstream os;
2642  os << *prefix << "Call Directory::getDirectoryEntries: ";
2643  verbosePrintArray(os, myElems, "GIDs", maxNumToPrint);
2644  os << endl;
2645  cerr << os.str();
2646  }
2647  directory.getDirectoryEntries (*M, myElems, owner_procs_vec ());
2648  if (verbose) {
2649  std::ostringstream os;
2650  os << *prefix << "getDirectoryEntries result: ";
2651  verbosePrintArray(os, owner_procs_vec, "PIDs", maxNumToPrint);
2652  os << endl;
2653  cerr << os.str();
2654  }
2655 
2656  Array<GO> myOwned_vec (numMyElems);
2657  size_t numMyOwnedElems = 0;
2658  for (size_t i = 0; i < numMyElems; ++i) {
2659  const GO GID = myElems[i];
2660  const int owner = owner_procs_vec[i];
2661 
2662  if (myRank == owner) {
2663  myOwned_vec[numMyOwnedElems++] = GID;
2664  }
2665  }
2666  myOwned_vec.resize (numMyOwnedElems);
2667 
2668  if (verbose) {
2669  std::ostringstream os;
2670  os << *prefix << "Create Map: ";
2671  verbosePrintArray(os, myOwned_vec, "GIDs", maxNumToPrint);
2672  os << endl;
2673  cerr << os.str();
2674  }
2675  auto retMap = rcp(new map_type(GINV, myOwned_vec(),
2676  M->getIndexBase(), M->getComm()));
2677  if (verbose) {
2678  std::ostringstream os;
2679  os << *prefix << "Done" << endl;
2680  cerr << os.str();
2681  }
2682  return retMap;
2683  }
2684 }
2685 
2686 template<class LocalOrdinal, class GlobalOrdinal, class Node>
2687 Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >
2690 {
2691  using Details::Behavior;
2693  using Teuchos::Array;
2694  using Teuchos::ArrayView;
2695  using Teuchos::RCP;
2696  using Teuchos::rcp;
2697  using Teuchos::toString;
2698  using std::cerr;
2699  using std::endl;
2700  using LO = LocalOrdinal;
2701  using GO = GlobalOrdinal;
2702  using map_type = Tpetra::Map<LO, GO, Node>;
2703 
2704  const bool verbose = Behavior::verbose("Map");
2705  std::unique_ptr<std::string> prefix;
2706  if (verbose) {
2707  auto comm = M.is_null() ? Teuchos::null : M->getComm();
2708  prefix = Details::createPrefix(
2709  comm.getRawPtr(), "createOneToOne(Map,TieBreak)");
2710  std::ostringstream os;
2711  os << *prefix << "Start" << endl;
2712  cerr << os.str();
2713  }
2714  const size_t maxNumToPrint = verbose ?
2715  Behavior::verbosePrintCountThreshold() : size_t(0);
2716 
2717  // FIXME (mfh 20 Feb 2013) We should have a bypass for contiguous
2718  // Maps (which are 1-to-1 by construction).
2719 
2721  if (verbose) {
2722  std::ostringstream os;
2723  os << *prefix << "Initialize Directory" << endl;
2724  cerr << os.str ();
2725  }
2726  directory.initialize (*M, tie_break);
2727  if (verbose) {
2728  std::ostringstream os;
2729  os << *prefix << "Done initializing Directory" << endl;
2730  cerr << os.str ();
2731  }
2732  size_t numMyElems = M->getLocalNumElements ();
2733  ArrayView<const GO> myElems = M->getLocalElementList ();
2734  Array<int> owner_procs_vec (numMyElems);
2735  if (verbose) {
2736  std::ostringstream os;
2737  os << *prefix << "Call Directory::getDirectoryEntries: ";
2738  verbosePrintArray(os, myElems, "GIDs", maxNumToPrint);
2739  os << endl;
2740  cerr << os.str();
2741  }
2742  directory.getDirectoryEntries (*M, myElems, owner_procs_vec ());
2743  if (verbose) {
2744  std::ostringstream os;
2745  os << *prefix << "getDirectoryEntries result: ";
2746  verbosePrintArray(os, owner_procs_vec, "PIDs", maxNumToPrint);
2747  os << endl;
2748  cerr << os.str();
2749  }
2750 
2751  const int myRank = M->getComm()->getRank();
2752  Array<GO> myOwned_vec (numMyElems);
2753  size_t numMyOwnedElems = 0;
2754  for (size_t i = 0; i < numMyElems; ++i) {
2755  const GO GID = myElems[i];
2756  const int owner = owner_procs_vec[i];
2757  if (myRank == owner) {
2758  myOwned_vec[numMyOwnedElems++] = GID;
2759  }
2760  }
2761  myOwned_vec.resize (numMyOwnedElems);
2762 
2763  // FIXME (mfh 08 May 2014) The above Directory should be perfectly
2764  // valid for the new Map. Why can't we reuse it?
2765  const global_size_t GINV =
2766  Tpetra::Details::OrdinalTraits<global_size_t>::invalid ();
2767  if (verbose) {
2768  std::ostringstream os;
2769  os << *prefix << "Create Map: ";
2770  verbosePrintArray(os, myOwned_vec, "GIDs", maxNumToPrint);
2771  os << endl;
2772  cerr << os.str();
2773  }
2774  RCP<const map_type> retMap
2775  (new map_type (GINV, myOwned_vec (), M->getIndexBase (),
2776  M->getComm ()));
2777  if (verbose) {
2778  std::ostringstream os;
2779  os << *prefix << "Done" << endl;
2780  cerr << os.str();
2781  }
2782  return retMap;
2783 }
2784 
2785 //
2786 // Explicit instantiation macro
2787 //
2788 // Must be expanded from within the Tpetra namespace!
2789 //
2790 
2792 
2793 #define TPETRA_MAP_INSTANT(LO,GO,NODE) \
2794  \
2795  template class Map< LO , GO , NODE >; \
2796  \
2797  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2798  createLocalMapWithNode<LO,GO,NODE> (const size_t numElements, \
2799  const Teuchos::RCP< const Teuchos::Comm< int > >& comm); \
2800  \
2801  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2802  createContigMapWithNode<LO,GO,NODE> (const global_size_t numElements, \
2803  const size_t localNumElements, \
2804  const Teuchos::RCP< const Teuchos::Comm< int > >& comm); \
2805  \
2806  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2807  createNonContigMapWithNode(const Teuchos::ArrayView<const GO> &elementList, \
2808  const Teuchos::RCP<const Teuchos::Comm<int> > &comm); \
2809  \
2810  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2811  createUniformContigMapWithNode<LO,GO,NODE> (const global_size_t numElements, \
2812  const Teuchos::RCP< const Teuchos::Comm< int > >& comm); \
2813  \
2814  template Teuchos::RCP<const Map<LO,GO,NODE> > \
2815  createOneToOne (const Teuchos::RCP<const Map<LO,GO,NODE> >& M); \
2816  \
2817  template Teuchos::RCP<const Map<LO,GO,NODE> > \
2818  createOneToOne (const Teuchos::RCP<const Map<LO,GO,NODE> >& M, \
2819  const Tpetra::Details::TieBreak<LO,GO>& tie_break); \
2820 
2821 
2823 #define TPETRA_MAP_INSTANT_DEFAULTNODE(LO,GO) \
2824  template Teuchos::RCP< const Map<LO,GO> > \
2825  createLocalMap<LO,GO>( const size_t, const Teuchos::RCP< const Teuchos::Comm< int > > &); \
2826  \
2827  template Teuchos::RCP< const Map<LO,GO> > \
2828  createContigMap<LO,GO>( global_size_t, size_t, \
2829  const Teuchos::RCP< const Teuchos::Comm< int > > &); \
2830  \
2831  template Teuchos::RCP< const Map<LO,GO> > \
2832  createNonContigMap(const Teuchos::ArrayView<const GO> &, \
2833  const Teuchos::RCP<const Teuchos::Comm<int> > &); \
2834  \
2835  template Teuchos::RCP< const Map<LO,GO> > \
2836  createUniformContigMap<LO,GO>( const global_size_t, \
2837  const Teuchos::RCP< const Teuchos::Comm< int > > &); \
2838 
2839 #endif // TPETRA_MAP_DEF_HPP
Interface for breaking ties in ownership.
bool congruent(const Teuchos::Comm< int > &comm1, const Teuchos::Comm< int > &comm2)
Whether the two communicators are congruent.
Definition: Tpetra_Util.cpp:34
global_ordinal_type getMaxGlobalIndex() const
The maximum global index owned by the calling process.
LocalOrdinal local_ordinal_type
The type of local indices.
Declaration of Tpetra::Details::extractMpiCommFromTeuchos.
virtual ~Map()
Destructor (virtual for memory safety of derived classes).
GlobalOrdinal global_ordinal_type
The type of global indices.
bool isSameAs(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is identical to this Map.
Declaration of Tpetra::Details::printOnce.
global_indices_array_type getMyGlobalIndices() const
Return a view of the global indices owned by this process.
bool isContiguous() const
True if this Map is distributed contiguously, else false.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
Declaration of a function that prints strings from each process.
size_t getLocalNumElements() const
The number of elements belonging to the calling process.
bool isLocallyFitted(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is locally fitted to this Map.
bool isUniform() const
Whether the range of global indices is uniform.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createContigMapWithNode(const global_size_t numElements, const size_t localNumElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a (potentially) nonuniformly distributed, contiguous Map for a user-specifi...
global_indices_array_device_type getMyGlobalIndicesDevice() const
Return a view of the global indices owned by this process on the Map&#39;s device.
bool isDistributed() const
Whether this Map is globally distributed or locally replicated.
Teuchos::RCP< const Map< local_ordinal_type, global_ordinal_type, Node > > removeEmptyProcesses() const
Advanced methods.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
static bool debug()
Whether Tpetra is in debug mode.
static void reject_unrecognized_env_vars()
Search the environment for TPETRA_ variables and reject unrecognized ones.
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
int local_ordinal_type
Default value of Scalar template parameter.
&quot;Local&quot; part of Map suitable for Kokkos kernels.
Declaration of Tpetra::Details::initializeKokkos.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator, in rank order.
Teuchos::ArrayView< const global_ordinal_type > getLocalElementList() const
Return a NONOWNING view of the global indices owned by this process.
void initializeKokkos()
Initialize Kokkos, using command-line arguments (if any) given to Teuchos::GlobalMPISession.
size_t global_size_t
Global size_t object.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
bool isCompatible(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is compatible with this Map.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified, possibly nondefault Kokkos Node type.
global_ordinal_type getIndexBase() const
The index base for this Map.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
Teuchos::RCP< const Map< local_ordinal_type, global_ordinal_type, Node > > replaceCommWithSubset(const Teuchos::RCP< const Teuchos::Comm< int > > &newComm) const
Replace this Map&#39;s communicator with a subset communicator.
Functions for initializing and finalizing Tpetra.
bool isNodeGlobalElement(global_ordinal_type globalIndex) const
Whether the given global index is owned by this Map on the calling process.
static bool verbose()
Whether Tpetra is in verbose mode.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createUniformContigMap(const global_size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a uniformly distributed, contiguous Map with the default Kokkos Node...
LookupStatus getDirectoryEntries(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs) const
Given a global ID list, return the list of their owning process IDs.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createContigMap(const global_size_t numElements, const size_t localNumElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a (potentially) non-uniformly distributed, contiguous Map using the defaul...
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createLocalMap(const size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a locally replicated Map with the default Kokkos Node.
local_map_type getLocalMap() const
Get the LocalMap for Kokkos-Kernels.
global_ordinal_type getMinGlobalIndex() const
The minimum global index owned by the calling process.
typename device_type::execution_space execution_space
The Kokkos execution space.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createUniformContigMapWithNode(const global_size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a uniformly distributed, contiguous Map with a user-specified Kokkos Node...
A parallel distribution of indices over processes.
global_ordinal_type getMaxAllGlobalIndex() const
The maximum global index over all processes in the communicator.
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
Implement mapping from global ID to process ID and local ID.
Stand-alone utility functions and macros.
bool isOneToOne() const
Whether the Map is one to one.
void initialize(const map_type &map)
Initialize the Directory with its Map.
bool locallySameAs(const Map< local_ordinal_type, global_ordinal_type, node_type > &map) const
Is this Map locally the same as the input Map?
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createLocalMapWithNode(const size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a locally replicated Map with a specified Kokkos Node.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
typename Node::device_type device_type
This class&#39; Kokkos::Device specialization.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createNonContigMap(const Teuchos::ArrayView< const GlobalOrdinal > &elementList, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a non-contiguous Map using the default Kokkos::Device type.
LocalGlobal
Enum for local versus global allocation of Map entries.
LookupStatus getRemoteIndexList(const Teuchos::ArrayView< const global_ordinal_type > &GIDList, const Teuchos::ArrayView< int > &nodeIDList, const Teuchos::ArrayView< local_ordinal_type > &LIDList) const
Return the process ranks and corresponding local indices for the given global indices.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Describe this object in a human-readable way to the given output stream.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createNonContigMapWithNode(const Teuchos::ArrayView< const GlobalOrdinal > &elementList, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a noncontiguous Map with a user-specified, possibly nondefault Kokkos Node ...
Node node_type
Legacy typedef that will go away at some point.
global_ordinal_type getMinAllGlobalIndex() const
The minimum global index over all processes in the communicator.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
Definition: Tpetra_Util.cpp:71
global_size_t getGlobalNumElements() const
The number of elements in this Map.
Description of Tpetra&#39;s behavior.
std::string description() const
Implementation of Teuchos::Describable.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.
Map()
Default constructor (that does nothing).