Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Map_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
14 
15 #ifndef TPETRA_MAP_DEF_HPP
16 #define TPETRA_MAP_DEF_HPP
17 
18 #include <memory>
19 #include <sstream>
20 #include <stdexcept>
21 #include <typeinfo>
22 
23 #include "Teuchos_as.hpp"
24 #include "Teuchos_TypeNameTraits.hpp"
25 #include "Teuchos_CommHelpers.hpp"
26 
27 #include "Tpetra_Directory.hpp" // must include for implicit instantiation to work
29 #include "Tpetra_Details_FixedHashTable.hpp"
32 #include "Tpetra_Core.hpp"
33 #include "Tpetra_Util.hpp"
34 #include "Tpetra_Details_mpiIsInitialized.hpp"
35 #include "Tpetra_Details_extractMpiCommFromTeuchos.hpp" // teuchosCommIsAnMpiComm
38 
39 namespace { // (anonymous)
40 
41 void checkMapInputArray(const char ctorName[],
42  const void* indexList,
43  const size_t indexListSize,
44  const Teuchos::Comm<int>* const comm) {
46 
47  const bool debug = Behavior::debug("Map");
48  if (debug) {
49  using std::endl;
50  using Teuchos::outArg;
51  using Teuchos::REDUCE_MIN;
52  using Teuchos::reduceAll;
53 
54  const int myRank = comm == nullptr ? 0 : comm->getRank();
55  const bool verbose = Behavior::verbose("Map");
56  std::ostringstream lclErrStrm;
57  int lclSuccess = 1;
58 
59  if (indexListSize != 0 && indexList == nullptr) {
60  lclSuccess = 0;
61  if (verbose) {
62  lclErrStrm << "Proc " << myRank << ": indexList is null, "
63  "but indexListSize="
64  << indexListSize << " != 0." << endl;
65  }
66  }
67  int gblSuccess = 0; // output argument
68  reduceAll(*comm, REDUCE_MIN, lclSuccess, outArg(gblSuccess));
69  if (gblSuccess != 1) {
70  std::ostringstream gblErrStrm;
71  gblErrStrm << "Tpetra::Map constructor " << ctorName << " detected a problem with the input array "
72  "(raw array, Teuchos::ArrayView, or Kokkos::View) "
73  "of global indices."
74  << endl;
75  if (verbose) {
76  using ::Tpetra::Details::gathervPrint;
77  gathervPrint(gblErrStrm, lclErrStrm.str(), *comm);
78  }
79  TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, gblErrStrm.str());
80  }
81  }
82 }
83 
84 template <class LocalOrdinal, class GlobalOrdinal, class ViewType>
85 void computeConstantsOnDevice(const ViewType& entryList, GlobalOrdinal& minMyGID, GlobalOrdinal& maxMyGID, GlobalOrdinal& firstContiguousGID, GlobalOrdinal& lastContiguousGID_val, LocalOrdinal& lastContiguousGID_loc) {
86  using LO = LocalOrdinal;
87  using GO = GlobalOrdinal;
88  using exec_space = typename ViewType::device_type::execution_space;
89  using range_policy = Kokkos::RangePolicy<exec_space, Kokkos::IndexType<LO>>;
90  const LO numLocalElements = entryList.extent(0);
91 
92  // We're going to use the minloc backwards because we need to have it sort on the "location" and have the "value" along for the
93  // ride, rather than the other way around
94  typedef typename Kokkos::MinLoc<LO, GO>::value_type minloc_type;
95  minloc_type myMinLoc;
96 
97  // Find the initial sequence of parallel gids
98  // To find the lastContiguousGID_, we find the first guy where entryList[i] - entryList[0] != i-0. That's the first non-contiguous guy.
99  // We want the one *before* that guy.
100  Kokkos::parallel_reduce(
101  range_policy(0, numLocalElements), KOKKOS_LAMBDA(const LO& i, GO& l_myMin, GO& l_myMax, GO& l_firstCont, minloc_type& l_lastCont) {
102  GO entry_0 = entryList[0];
103  GO entry_i = entryList[i];
104 
105  // Easy stuff
106  l_myMin = (l_myMin < entry_i) ? l_myMin : entry_i;
107  l_myMax = (l_myMax > entry_i) ? l_myMax : entry_i;
108  l_firstCont = entry_0;
109 
110  if (entry_i - entry_0 != i && l_lastCont.val >= i) {
111  // We're non-contiguous, so the guy before us could be the last contiguous guy
112  l_lastCont.val = i - 1;
113  l_lastCont.loc = entryList[i - 1];
114  } else if (i == numLocalElements - 1 && i < l_lastCont.val) {
115  // If we're last, we always think we're the last contiguous guy, unless someone non-contiguous is already here
116  l_lastCont.val = i;
117  l_lastCont.loc = entry_i;
118  }
119  },
120  Kokkos::Min<GO>(minMyGID), Kokkos::Max<GO>(maxMyGID), Kokkos::Min<GO>(firstContiguousGID), Kokkos::MinLoc<LO, GO>(myMinLoc));
121 
122  // This switch is intentional, since we're using MinLoc backwards
123  lastContiguousGID_val = myMinLoc.loc;
124  lastContiguousGID_loc = myMinLoc.val;
125 }
126 
127 } // namespace
128 
129 namespace Tpetra {
130 
131 template <class LocalOrdinal, class GlobalOrdinal, class Node>
134  : comm_(new Teuchos::SerialComm<int>())
135  , indexBase_(0)
136  , numGlobalElements_(0)
137  , numLocalElements_(0)
138  , minMyGID_(Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid())
139  , maxMyGID_(Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid())
140  , minAllGID_(Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid())
141  , maxAllGID_(Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid())
142  , firstContiguousGID_(Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid())
143  , lastContiguousGID_(Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid())
144  , uniform_(false)
145  , // trivially
146  contiguous_(false)
147  , distributed_(false)
148  , // no communicator yet
149  directory_(new Directory<LocalOrdinal, GlobalOrdinal, Node>()) {
152 }
153 
154 template <class LocalOrdinal, class GlobalOrdinal, class Node>
156  Map(const global_size_t numGlobalElements,
157  const global_ordinal_type indexBase,
158  const Teuchos::RCP<const Teuchos::Comm<int>>& comm,
159  const LocalGlobal lOrG)
160  : comm_(comm)
161  , uniform_(true)
162  , directory_(new Directory<LocalOrdinal, GlobalOrdinal, Node>()) {
163  using std::endl;
164  using Teuchos::as;
165  using Teuchos::broadcast;
166  using Teuchos::outArg;
167  using Teuchos::REDUCE_MAX;
168  using Teuchos::REDUCE_MIN;
169  using Teuchos::reduceAll;
170  using Teuchos::typeName;
171  using GO = global_ordinal_type;
172  using GST = global_size_t;
173  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid();
174  const char funcName[] = "Map(gblNumInds,indexBase,comm,LG)";
175  const char exPfx[] =
176  "Tpetra::Map::Map(gblNumInds,indexBase,comm,LG): ";
177 
178  const bool debug = Details::Behavior::debug("Map");
179  const bool verbose = Details::Behavior::verbose("Map");
180  std::unique_ptr<std::string> prefix;
181  if (verbose) {
182  prefix = Details::createPrefix(
183  comm_.getRawPtr(), "Map", funcName);
184  std::ostringstream os;
185  os << *prefix << "Start" << endl;
186  std::cerr << os.str();
187  }
190 
191  // In debug mode only, check whether numGlobalElements and
192  // indexBase are the same over all processes in the communicator.
193  if (debug) {
194  GST proc0NumGlobalElements = numGlobalElements;
195  broadcast(*comm_, 0, outArg(proc0NumGlobalElements));
196  GST minNumGlobalElements = numGlobalElements;
197  GST maxNumGlobalElements = numGlobalElements;
198  reduceAll(*comm, REDUCE_MIN, numGlobalElements,
199  outArg(minNumGlobalElements));
200  reduceAll(*comm, REDUCE_MAX, numGlobalElements,
201  outArg(maxNumGlobalElements));
202  TEUCHOS_TEST_FOR_EXCEPTION(minNumGlobalElements != maxNumGlobalElements ||
203  numGlobalElements != minNumGlobalElements,
204  std::invalid_argument, exPfx << "All processes must "
205  "provide the same number of global elements. Process 0 set "
206  "numGlobalElements="
207  << proc0NumGlobalElements << ". The "
208  "calling process "
209  << comm->getRank() << " set "
210  "numGlobalElements="
211  << numGlobalElements << ". The min "
212  "and max values over all processes are "
213  << minNumGlobalElements << " resp. " << maxNumGlobalElements << ".");
214 
215  GO proc0IndexBase = indexBase;
216  broadcast<int, GO>(*comm_, 0, outArg(proc0IndexBase));
217  GO minIndexBase = indexBase;
218  GO maxIndexBase = indexBase;
219  reduceAll(*comm, REDUCE_MIN, indexBase, outArg(minIndexBase));
220  reduceAll(*comm, REDUCE_MAX, indexBase, outArg(maxIndexBase));
221  TEUCHOS_TEST_FOR_EXCEPTION(minIndexBase != maxIndexBase || indexBase != minIndexBase,
222  std::invalid_argument, exPfx << "All processes must "
223  "provide the same indexBase argument. Process 0 set "
224  "indexBase="
225  << proc0IndexBase << ". The calling process " << comm->getRank() << " set indexBase=" << indexBase << ". The min and max values over all processes are " << minIndexBase << " resp. " << maxIndexBase << ".");
226  }
227 
228  // Distribute the elements across the processes in the given
229  // communicator so that global IDs (GIDs) are
230  //
231  // - Nonoverlapping (only one process owns each GID)
232  // - Contiguous (the sequence of GIDs is nondecreasing, and no two
233  // adjacent GIDs differ by more than one)
234  // - As evenly distributed as possible (the numbers of GIDs on two
235  // different processes do not differ by more than one)
236 
237  // All processes have the same numGlobalElements, but we still
238  // need to check that it is valid. numGlobalElements must be
239  // positive and not the "invalid" value (GSTI).
240  //
241  // This comparison looks funny, but it avoids compiler warnings
242  // for comparing unsigned integers (numGlobalElements_in is a
243  // GST, which is unsigned) while still working if we
244  // later decide to make GST signed.
245  TEUCHOS_TEST_FOR_EXCEPTION(
246  (numGlobalElements < 1 && numGlobalElements != 0),
247  std::invalid_argument, exPfx << "numGlobalElements (= " << numGlobalElements << ") must be nonnegative.");
248 
249  TEUCHOS_TEST_FOR_EXCEPTION(numGlobalElements == GSTI, std::invalid_argument, exPfx << "You provided numGlobalElements = Teuchos::OrdinalTraits<"
250  "Tpetra::global_size_t>::invalid(). This version of the "
251  "constructor requires a valid value of numGlobalElements. "
252  "You probably mistook this constructor for the \"contiguous "
253  "nonuniform\" constructor, which can compute the global "
254  "number of elements for you if you set numGlobalElements to "
255  "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid().");
256 
257  size_t numLocalElements = 0; // will set below
258  if (lOrG == GloballyDistributed) {
259  // Compute numLocalElements:
260  //
261  // If numGlobalElements == numProcs * B + remainder,
262  // then Proc r gets B+1 elements if r < remainder,
263  // and B elements if r >= remainder.
264  //
265  // This strategy is valid for any value of numGlobalElements and
266  // numProcs, including the following border cases:
267  // - numProcs == 1
268  // - numLocalElements < numProcs
269  //
270  // In the former case, remainder == 0 && numGlobalElements ==
271  // numLocalElements. In the latter case, remainder ==
272  // numGlobalElements && numLocalElements is either 0 or 1.
273  const GST numProcs = static_cast<GST>(comm_->getSize());
274  const GST myRank = static_cast<GST>(comm_->getRank());
275  const GST quotient = numGlobalElements / numProcs;
276  const GST remainder = numGlobalElements - quotient * numProcs;
277 
278  GO startIndex;
279  if (myRank < remainder) {
280  numLocalElements = static_cast<size_t>(1) + static_cast<size_t>(quotient);
281  // myRank was originally an int, so it should never overflow
282  // reasonable GO types.
283  startIndex = as<GO>(myRank) * as<GO>(numLocalElements);
284  } else {
285  numLocalElements = as<size_t>(quotient);
286  startIndex = as<GO>(myRank) * as<GO>(numLocalElements) +
287  as<GO>(remainder);
288  }
289 
290  minMyGID_ = indexBase + startIndex;
291  maxMyGID_ = indexBase + startIndex + numLocalElements - 1;
292  minAllGID_ = indexBase;
293  maxAllGID_ = indexBase + numGlobalElements - 1;
294  distributed_ = (numProcs > 1);
295  } else { // lOrG == LocallyReplicated
296  numLocalElements = as<size_t>(numGlobalElements);
297  minMyGID_ = indexBase;
298  maxMyGID_ = indexBase + numGlobalElements - 1;
299  distributed_ = false;
300  }
301 
302  minAllGID_ = indexBase;
303  maxAllGID_ = indexBase + numGlobalElements - 1;
304  indexBase_ = indexBase;
305  numGlobalElements_ = numGlobalElements;
306  numLocalElements_ = numLocalElements;
307  firstContiguousGID_ = minMyGID_;
308  lastContiguousGID_ = maxMyGID_;
309  contiguous_ = true;
310 
311  // Create the Directory on demand in getRemoteIndexList().
312  // setupDirectory ();
313 
314  if (verbose) {
315  std::ostringstream os;
316  os << *prefix << "Done" << endl;
317  std::cerr << os.str();
318  }
319 }
320 
321 template <class LocalOrdinal, class GlobalOrdinal, class Node>
323  Map(const global_size_t numGlobalElements,
324  const size_t numLocalElements,
325  const global_ordinal_type indexBase,
326  const Teuchos::RCP<const Teuchos::Comm<int>>& comm)
327  : comm_(comm)
328  , uniform_(false)
329  , directory_(new Directory<LocalOrdinal, GlobalOrdinal, Node>()) {
330  using std::endl;
331  using Teuchos::as;
332  using Teuchos::broadcast;
333  using Teuchos::outArg;
334  using Teuchos::REDUCE_MAX;
335  using Teuchos::REDUCE_MIN;
336  using Teuchos::REDUCE_SUM;
337  using Teuchos::reduceAll;
338  using Teuchos::scan;
339  using GO = global_ordinal_type;
340  using GST = global_size_t;
341  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid();
342  const char funcName[] =
343  "Map(gblNumInds,lclNumInds,indexBase,comm)";
344  const char exPfx[] =
345  "Tpetra::Map::Map(gblNumInds,lclNumInds,indexBase,comm): ";
346  const char suffix[] =
347  ". Please report this bug to the Tpetra developers.";
348 
349  const bool debug = Details::Behavior::debug("Map");
350  const bool verbose = Details::Behavior::verbose("Map");
351  std::unique_ptr<std::string> prefix;
352  if (verbose) {
353  prefix = Details::createPrefix(
354  comm_.getRawPtr(), "Map", funcName);
355  std::ostringstream os;
356  os << *prefix << "Start" << endl;
357  std::cerr << os.str();
358  }
361 
362  // Global sum of numLocalElements over all processes.
363  // Keep this for later debug checks.
364  GST debugGlobalSum{};
365  if (debug) {
366  debugGlobalSum = initialNonuniformDebugCheck(exPfx,
367  numGlobalElements, numLocalElements, indexBase, comm);
368  }
369 
370  // Distribute the elements across the nodes so that they are
371  // - non-overlapping
372  // - contiguous
373 
374  // This differs from the first Map constructor (that only takes a
375  // global number of elements) in that the user has specified the
376  // number of local elements, so that the elements are not
377  // (necessarily) evenly distributed over the processes.
378 
379  // Compute my local offset. This is an inclusive scan, so to get
380  // the final offset, we subtract off the input.
381  GO scanResult = 0;
382  scan<int, GO>(*comm, REDUCE_SUM, numLocalElements, outArg(scanResult));
383  const GO myOffset = scanResult - numLocalElements;
384 
385  if (numGlobalElements != GSTI) {
386  numGlobalElements_ = numGlobalElements; // Use the user's value.
387  } else {
388  // Inclusive scan means that the last process has the final sum.
389  // Rather than doing a reduceAll to get the sum of
390  // numLocalElements, we can just have the last process broadcast
391  // its result. That saves us a round of log(numProcs) messages.
392  const int numProcs = comm->getSize();
393  GST globalSum = scanResult;
394  if (numProcs > 1) {
395  broadcast(*comm, numProcs - 1, outArg(globalSum));
396  }
397  numGlobalElements_ = globalSum;
398 
399  if (debug) {
400  // No need for an all-reduce here; both come from collectives.
401  TEUCHOS_TEST_FOR_EXCEPTION(globalSum != debugGlobalSum, std::logic_error, exPfx << "globalSum = " << globalSum << " != debugGlobalSum = " << debugGlobalSum << suffix);
402  }
403  }
404  numLocalElements_ = numLocalElements;
405  indexBase_ = indexBase;
406  minAllGID_ = (numGlobalElements_ == 0) ? std::numeric_limits<GO>::max() : indexBase;
407  maxAllGID_ = (numGlobalElements_ == 0) ? std::numeric_limits<GO>::lowest() : indexBase + GO(numGlobalElements_) - GO(1);
408  minMyGID_ = (numLocalElements_ == 0) ? std::numeric_limits<GO>::max() : indexBase + GO(myOffset);
409  maxMyGID_ = (numLocalElements_ == 0) ? std::numeric_limits<GO>::lowest() : indexBase + myOffset + GO(numLocalElements) - GO(1);
410  firstContiguousGID_ = minMyGID_;
411  lastContiguousGID_ = maxMyGID_;
412  contiguous_ = true;
413  distributed_ = checkIsDist();
414 
415  // Create the Directory on demand in getRemoteIndexList().
416  // setupDirectory ();
417 
418  if (verbose) {
419  std::ostringstream os;
420  os << *prefix << "Done" << endl;
421  std::cerr << os.str();
422  }
423 }
424 
425 template <class LocalOrdinal, class GlobalOrdinal, class Node>
429  const char errorMessagePrefix[],
430  const global_size_t numGlobalElements,
431  const size_t numLocalElements,
432  const global_ordinal_type indexBase,
433  const Teuchos::RCP<const Teuchos::Comm<int>>& comm) const {
434  const bool debug = Details::Behavior::debug("Map");
435  if (!debug) {
436  return global_size_t(0);
437  }
438 
439  using Teuchos::broadcast;
440  using Teuchos::outArg;
441  using Teuchos::ptr;
442  using Teuchos::REDUCE_MAX;
443  using Teuchos::REDUCE_MIN;
444  using Teuchos::REDUCE_SUM;
445  using Teuchos::reduceAll;
446  using GO = global_ordinal_type;
447  using GST = global_size_t;
448  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid();
449 
450  // The user has specified the distribution of indices over the
451  // processes. The distribution is not necessarily contiguous or
452  // equally shared over the processes.
453  //
454  // We assume that the number of local elements can be stored in a
455  // size_t. The instance member numLocalElements_ is a size_t, so
456  // this variable and that should have the same type.
457 
458  GST debugGlobalSum = 0; // Will be global sum of numLocalElements
459  reduceAll<int, GST>(*comm, REDUCE_SUM, static_cast<GST>(numLocalElements),
460  outArg(debugGlobalSum));
461  // In debug mode only, check whether numGlobalElements and
462  // indexBase are the same over all processes in the communicator.
463  {
464  GST proc0NumGlobalElements = numGlobalElements;
465  broadcast<int, GST>(*comm_, 0, outArg(proc0NumGlobalElements));
466  GST minNumGlobalElements = numGlobalElements;
467  GST maxNumGlobalElements = numGlobalElements;
468  reduceAll<int, GST>(*comm, REDUCE_MIN, numGlobalElements,
469  outArg(minNumGlobalElements));
470  reduceAll<int, GST>(*comm, REDUCE_MAX, numGlobalElements,
471  outArg(maxNumGlobalElements));
472  TEUCHOS_TEST_FOR_EXCEPTION(minNumGlobalElements != maxNumGlobalElements ||
473  numGlobalElements != minNumGlobalElements,
474  std::invalid_argument, errorMessagePrefix << "All processes "
475  "must provide the same number of global elements, even if "
476  "that argument is "
477  "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid() "
478  "(which signals that the Map should compute the global "
479  "number of elements). Process 0 set numGlobalElements"
480  "="
481  << proc0NumGlobalElements << ". The calling process " << comm->getRank() << " set numGlobalElements=" << numGlobalElements << ". The min and max values over all "
482  "processes are "
483  << minNumGlobalElements << " resp. " << maxNumGlobalElements << ".");
484 
485  GO proc0IndexBase = indexBase;
486  broadcast<int, GO>(*comm_, 0, outArg(proc0IndexBase));
487  GO minIndexBase = indexBase;
488  GO maxIndexBase = indexBase;
489  reduceAll<int, GO>(*comm, REDUCE_MIN, indexBase, outArg(minIndexBase));
490  reduceAll<int, GO>(*comm, REDUCE_MAX, indexBase, outArg(maxIndexBase));
491  TEUCHOS_TEST_FOR_EXCEPTION(minIndexBase != maxIndexBase || indexBase != minIndexBase,
492  std::invalid_argument, errorMessagePrefix << "All processes must provide the same indexBase argument. "
493  "Process 0 set indexBase = "
494  << proc0IndexBase << ". The "
495  "calling process "
496  << comm->getRank() << " set indexBase=" << indexBase << ". The min and max values over all "
497  "processes are "
498  << minIndexBase << " resp. " << maxIndexBase << ".");
499 
500  // Make sure that the sum of numLocalElements over all processes
501  // equals numGlobalElements.
502  TEUCHOS_TEST_FOR_EXCEPTION(numGlobalElements != GSTI &&
503  debugGlobalSum != numGlobalElements,
504  std::invalid_argument,
505  errorMessagePrefix << "The sum of each process' number of "
506  "indices over all processes, "
507  << debugGlobalSum << ", != "
508  << "numGlobalElements=" << numGlobalElements << ". If you "
509  "would like this constructor to compute numGlobalElements "
510  "for you, you may set numGlobalElements="
511  "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid() "
512  "on input. Please note that this is NOT necessarily -1.");
513  }
514  return debugGlobalSum;
515 }
516 
517 template <class LocalOrdinal, class GlobalOrdinal, class Node>
518 void Map<LocalOrdinal, GlobalOrdinal, Node>::
519  initWithNonownedHostIndexList(
520  const char errorMessagePrefix[],
521  const global_size_t numGlobalElements,
522  const Kokkos::View<const global_ordinal_type*,
523  Kokkos::LayoutLeft,
524  Kokkos::HostSpace,
525  Kokkos::MemoryUnmanaged>& entryList_host,
526  const global_ordinal_type indexBase,
527  const Teuchos::RCP<const Teuchos::Comm<int>>& comm) {
528  Tpetra::Details::ProfilingRegion pr("Map::initWithNonownedHostIndexList()");
529 
530  using Kokkos::LayoutLeft;
531  using Kokkos::subview;
532  using Kokkos::View;
533  using Kokkos::view_alloc;
534  using Kokkos::WithoutInitializing;
535  using Teuchos::as;
536  using Teuchos::broadcast;
537  using Teuchos::outArg;
538  using Teuchos::ptr;
539  using Teuchos::REDUCE_MAX;
540  using Teuchos::REDUCE_MIN;
541  using Teuchos::REDUCE_SUM;
542  using Teuchos::reduceAll;
543  using LO = local_ordinal_type;
544  using GO = global_ordinal_type;
545  using GST = global_size_t;
546  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid();
547  // Make sure that Kokkos has been initialized (Github Issue #513).
548  TEUCHOS_TEST_FOR_EXCEPTION(!Kokkos::is_initialized(), std::runtime_error,
549  errorMessagePrefix << "The Kokkos execution space "
550  << Teuchos::TypeNameTraits<execution_space>::name()
551  << " has not been initialized. "
552  "Please initialize it before creating a Map.")
553 
554  // The user has specified the distribution of indices over the
555  // processes, via the input array of global indices on each
556  // process. The distribution is not necessarily contiguous or
557  // equally shared over the processes.
558 
559  // The length of the input array on this process is the number of
560  // local indices to associate with this process, even though the
561  // input array contains global indices. We assume that the number
562  // of local indices on a process can be stored in a size_t;
563  // numLocalElements_ is a size_t, so this variable and that should
564  // have the same type.
565  const size_t numLocalElements(entryList_host.size());
566 
567  initialNonuniformDebugCheck(errorMessagePrefix, numGlobalElements,
568  numLocalElements, indexBase, comm);
569 
570  // NOTE (mfh 20 Feb 2013, 10 Oct 2016) In some sense, this global
571  // reduction is redundant, since the directory Map will have to do
572  // the same thing. Thus, we could do the scan and broadcast for
573  // the directory Map here, and give the computed offsets to the
574  // directory Map's constructor. However, a reduction costs less
575  // than a scan and broadcast, so this still saves time if users of
576  // this Map don't ever need the Directory (i.e., if they never
577  // call getRemoteIndexList on this Map).
578  if (numGlobalElements != GSTI) {
579  numGlobalElements_ = numGlobalElements; // Use the user's value.
580  } else { // The user wants us to compute the sum.
581  reduceAll(*comm, REDUCE_SUM,
582  static_cast<GST>(numLocalElements),
583  outArg(numGlobalElements_));
584  }
585 
586  // mfh 20 Feb 2013: We've never quite done the right thing for
587  // duplicate GIDs here. Duplicate GIDs have always been counted
588  // distinctly in numLocalElements_, and thus should get a
589  // different LID. However, we've always used std::map or a hash
590  // table for the GID -> LID lookup table, so distinct GIDs always
591  // map to the same LID. Furthermore, the order of the input GID
592  // list matters, so it's not desirable to sort for determining
593  // uniqueness.
594  //
595  // I've chosen for now to write this code as if the input GID list
596  // contains no duplicates. If this is not desired, we could use
597  // the lookup table itself to determine uniqueness: If we haven't
598  // seen the GID before, it gets a new LID and it's added to the
599  // LID -> GID and GID -> LID tables. If we have seen the GID
600  // before, it doesn't get added to either table. I would
601  // implement this, but it would cost more to do the double lookups
602  // in the table (one to check, and one to insert).
603  //
604  // More importantly, since we build the GID -> LID table in (a
605  // thread-) parallel (way), the order in which duplicate GIDs may
606  // get inserted is not defined. This would make the assignment of
607  // LID to GID nondeterministic.
608 
609  numLocalElements_ = numLocalElements;
610  indexBase_ = indexBase;
611 
612  minMyGID_ = indexBase_;
613  maxMyGID_ = indexBase_;
614 
615  // NOTE (mfh 27 May 2015): While finding the initial contiguous
616  // GID range requires looking at all the GIDs in the range,
617  // dismissing an interval of GIDs only requires looking at the
618  // first and last GIDs. Thus, we could do binary search backwards
619  // from the end in order to catch the common case of a contiguous
620  // interval followed by noncontiguous entries. On the other hand,
621  // we could just expose this case explicitly as yet another Map
622  // constructor, and avoid the trouble of detecting it.
623  if (numLocalElements_ > 0) {
624  // Find contiguous GID range, with the restriction that the
625  // beginning of the range starts with the first entry. While
626  // doing so, fill in the LID -> GID table.
627  typename decltype(lgMap_)::non_const_type lgMap(view_alloc("lgMap", WithoutInitializing), numLocalElements_);
628  auto lgMap_host =
629  Kokkos::create_mirror_view(Kokkos::HostSpace(), lgMap);
630 
631  // The input array entryList_host is already on host, so we
632  // don't need to take a host view of it.
633  // auto entryList_host =
634  // Kokkos::create_mirror_view (Kokkos::HostSpace (), entryList);
635  // Kokkos::deep_copy (entryList_host, entryList);
636 
637  firstContiguousGID_ = entryList_host[0];
638  lastContiguousGID_ = firstContiguousGID_ + 1;
639 
640  // FIXME (mfh 23 Sep 2015) We need to copy the input GIDs
641  // anyway, so we have to look at them all. The logical way to
642  // find the first noncontiguous entry would thus be to "reduce,"
643  // where the local reduction result is whether entryList[i] + 1
644  // == entryList[i+1].
645 
646  lgMap_host[0] = firstContiguousGID_;
647  size_t i = 1;
648  for (; i < numLocalElements_; ++i) {
649  const GO curGid = entryList_host[i];
650  const LO curLid = as<LO>(i);
651 
652  if (lastContiguousGID_ != curGid) break;
653 
654  // Add the entry to the LID->GID table only after we know that
655  // the current GID is in the initial contiguous sequence, so
656  // that we don't repeat adding it in the first iteration of
657  // the loop below over the remaining noncontiguous GIDs.
658  lgMap_host[curLid] = curGid;
659  ++lastContiguousGID_;
660  }
661  --lastContiguousGID_;
662  // NOTE: i is the first non-contiguous index.
663 
664  // [firstContiguousGID_, lastContigousGID_] is the initial
665  // sequence of contiguous GIDs. We can start the min and max
666  // GID using this range.
667  minMyGID_ = firstContiguousGID_;
668  maxMyGID_ = lastContiguousGID_;
669 
670  // Compute the GID -> LID lookup table, _not_ including the
671  // initial sequence of contiguous GIDs.
672  LO firstNonContiguous_loc = i;
673  {
674  const std::pair<size_t, size_t> ncRange(i, entryList_host.extent(0));
675  auto nonContigGids_host = subview(entryList_host, ncRange);
676  TEUCHOS_TEST_FOR_EXCEPTION(static_cast<size_t>(nonContigGids_host.extent(0)) !=
677  static_cast<size_t>(entryList_host.extent(0) - i),
678  std::logic_error,
679  "Tpetra::Map noncontiguous constructor: "
680  "nonContigGids_host.extent(0) = "
681  << nonContigGids_host.extent(0)
682  << " != entryList_host.extent(0) - i = "
683  << (entryList_host.extent(0) - i) << " = "
684  << entryList_host.extent(0) << " - " << i
685  << ". Please report this bug to the Tpetra developers.");
686 
687  // FixedHashTable's constructor expects an owned device View,
688  // so we must deep-copy the subview of the input indices.
689  View<GO*, LayoutLeft, device_type>
690  nonContigGids(view_alloc("nonContigGids", WithoutInitializing),
691  nonContigGids_host.size());
692 
693  // DEEP_COPY REVIEW - HOST-TO-DEVICE
694  Kokkos::deep_copy(execution_space(), nonContigGids, nonContigGids_host);
695  Kokkos::fence("Map::initWithNonownedHostIndexList"); // for UVM issues below - which will be refatored soon so FixedHashTable can build as pure CudaSpace - then I think remove this fence
696 
697  glMap_ = global_to_local_table_type(nonContigGids,
698  firstNonContiguous_loc);
699  // Make host version - when memory spaces match these just do trivial assignment
700  glMapHost_ = global_to_local_table_host_type(glMap_);
701  }
702 
703  // FIXME (mfh 10 Oct 2016) When we construct the global-to-local
704  // table above, we have to look at all the (noncontiguous) input
705  // indices anyway. Thus, why not have the constructor compute
706  // and return the min and max?
707 
708  for (; i < numLocalElements_; ++i) {
709  const GO curGid = entryList_host[i];
710  const LO curLid = static_cast<LO>(i);
711  lgMap_host[curLid] = curGid; // LID -> GID table
712 
713  // While iterating through entryList, we compute its
714  // (process-local) min and max elements.
715  if (curGid < minMyGID_) {
716  minMyGID_ = curGid;
717  }
718  if (curGid > maxMyGID_) {
719  maxMyGID_ = curGid;
720  }
721  }
722 
723  // We filled lgMap on host above; now sync back to device.
724  // DEEP_COPY REVIEW - HOST-TO-DEVICE
725  Kokkos::deep_copy(execution_space(), lgMap, lgMap_host);
726 
727  // "Commit" the local-to-global lookup table we filled in above.
728  lgMap_ = lgMap;
729  // We've already created this, so use it.
730  lgMapHost_ = lgMap_host;
731 
732  } else {
733  minMyGID_ = std::numeric_limits<GlobalOrdinal>::max();
734  maxMyGID_ = std::numeric_limits<GlobalOrdinal>::lowest();
735  // This insures tests for GIDs in the range
736  // [firstContiguousGID_, lastContiguousGID_] fail for processes
737  // with no local elements.
738  firstContiguousGID_ = indexBase_ + 1;
739  lastContiguousGID_ = indexBase_;
740  // glMap_ was default constructed, so it's already empty.
741  }
742 
743  // Compute the min and max of all processes' GIDs. If
744  // numLocalElements_ == 0 on this process, minMyGID_ and maxMyGID_
745  // are both indexBase_. This is wrong, but fixing it would
746  // require either a fancy sparse all-reduce, or a custom reduction
747  // operator that ignores invalid values ("invalid" means
748  // Tpetra::Details::OrdinalTraits<GO>::invalid()).
749  //
750  // Also, while we're at it, use the same all-reduce to figure out
751  // if the Map is distributed. "Distributed" means that there is
752  // at least one process with a number of local elements less than
753  // the number of global elements.
754  //
755  // We're computing the min and max of all processes' GIDs using a
756  // single MAX all-reduce, because min(x,y) = -max(-x,-y) (when x
757  // and y are signed). (This lets us combine the min and max into
758  // a single all-reduce.) If each process sets localDist=1 if its
759  // number of local elements is strictly less than the number of
760  // global elements, and localDist=0 otherwise, then a MAX
761  // all-reduce on localDist tells us if the Map is distributed (1
762  // if yes, 0 if no). Thus, we can append localDist onto the end
763  // of the data and get the global result from the all-reduce.
764  if (std::numeric_limits<GO>::is_signed) {
765  // Does my process NOT own all the elements?
766  const GO localDist =
767  (as<GST>(numLocalElements_) < numGlobalElements_) ? 1 : 0;
768 
769  GO minMaxInput[3];
770  minMaxInput[0] = -minMyGID_;
771  minMaxInput[1] = maxMyGID_;
772  minMaxInput[2] = localDist;
773 
774  GO minMaxOutput[3];
775  minMaxOutput[0] = 0;
776  minMaxOutput[1] = 0;
777  minMaxOutput[2] = 0;
778  reduceAll<int, GO>(*comm, REDUCE_MAX, 3, minMaxInput, minMaxOutput);
779  minAllGID_ = -minMaxOutput[0];
780  maxAllGID_ = minMaxOutput[1];
781  const GO globalDist = minMaxOutput[2];
782  distributed_ = (comm_->getSize() > 1 && globalDist == 1);
783  } else { // unsigned; use two reductions
784  // This is always correct, no matter the signedness of GO.
785  reduceAll<int, GO>(*comm_, REDUCE_MIN, minMyGID_, outArg(minAllGID_));
786  reduceAll<int, GO>(*comm_, REDUCE_MAX, maxMyGID_, outArg(maxAllGID_));
787  distributed_ = checkIsDist();
788  }
789 
790  contiguous_ = false; // "Contiguous" is conservative.
791 
792  TEUCHOS_TEST_FOR_EXCEPTION(
793  minAllGID_ < indexBase_,
794  std::invalid_argument,
795  "Tpetra::Map constructor (noncontiguous): "
796  "Minimum global ID = "
797  << minAllGID_ << " over all process(es) is "
798  "less than the given indexBase = "
799  << indexBase_ << ".");
800 
801  // Create the Directory on demand in getRemoteIndexList().
802  // setupDirectory ();
803 }
804 
805 template <class LocalOrdinal, class GlobalOrdinal, class Node>
807  Map(const global_size_t numGlobalElements,
808  const GlobalOrdinal indexList[],
809  const LocalOrdinal indexListSize,
810  const GlobalOrdinal indexBase,
811  const Teuchos::RCP<const Teuchos::Comm<int>>& comm)
812  : comm_(comm)
813  , uniform_(false)
814  , directory_(new Directory<LocalOrdinal, GlobalOrdinal, Node>()) {
815  using std::endl;
816  const char funcName[] =
817  "Map(gblNumInds,indexList,indexListSize,indexBase,comm)";
818 
819  const bool verbose = Details::Behavior::verbose("Map");
820  std::unique_ptr<std::string> prefix;
821  if (verbose) {
822  prefix = Details::createPrefix(
823  comm_.getRawPtr(), "Map", funcName);
824  std::ostringstream os;
825  os << *prefix << "Start" << endl;
826  std::cerr << os.str();
827  }
831  checkMapInputArray("(GST, const GO[], LO, GO, comm)",
832  indexList, static_cast<size_t>(indexListSize),
833  comm.getRawPtr());
834  // Not quite sure if I trust all code to behave correctly if the
835  // pointer is nonnull but the array length is nonzero, so I'll
836  // make sure the raw pointer is null if the length is zero.
837  const GlobalOrdinal* const indsRaw = indexListSize == 0 ? NULL : indexList;
838  Kokkos::View<const GlobalOrdinal*,
839  Kokkos::LayoutLeft,
840  Kokkos::HostSpace,
841  Kokkos::MemoryUnmanaged>
842  inds(indsRaw, indexListSize);
843  initWithNonownedHostIndexList(funcName, numGlobalElements, inds,
844  indexBase, comm);
845  if (verbose) {
846  std::ostringstream os;
847  os << *prefix << "Done" << endl;
848  std::cerr << os.str();
849  }
850 }
851 
852 template <class LocalOrdinal, class GlobalOrdinal, class Node>
854  Map(const global_size_t numGlobalElements,
855  const Teuchos::ArrayView<const GlobalOrdinal>& entryList,
856  const GlobalOrdinal indexBase,
857  const Teuchos::RCP<const Teuchos::Comm<int>>& comm)
858  : comm_(comm)
859  , uniform_(false)
860  , directory_(new Directory<LocalOrdinal, GlobalOrdinal, Node>()) {
861  using std::endl;
862  const char* funcName = "Map(gblNumInds,entryList(Teuchos::ArrayView),indexBase,comm)";
863 
864  const bool verbose = Details::Behavior::verbose("Map");
865  std::unique_ptr<std::string> prefix;
866  if (verbose) {
867  prefix = Details::createPrefix(
868  comm_.getRawPtr(), "Map", funcName);
869  std::ostringstream os;
870  os << *prefix << "Start" << endl;
871  std::cerr << os.str();
872  }
876  const size_t numLclInds = static_cast<size_t>(entryList.size());
877  checkMapInputArray("(GST, ArrayView, GO, comm)",
878  entryList.getRawPtr(), numLclInds,
879  comm.getRawPtr());
880  // Not quite sure if I trust both ArrayView and View to behave
881  // correctly if the pointer is nonnull but the array length is
882  // nonzero, so I'll make sure it's null if the length is zero.
883  const GlobalOrdinal* const indsRaw =
884  numLclInds == 0 ? NULL : entryList.getRawPtr();
885  Kokkos::View<const GlobalOrdinal*,
886  Kokkos::LayoutLeft,
887  Kokkos::HostSpace,
888  Kokkos::MemoryUnmanaged>
889  inds(indsRaw, numLclInds);
890  initWithNonownedHostIndexList(funcName, numGlobalElements, inds,
891  indexBase, comm);
892  if (verbose) {
893  std::ostringstream os;
894  os << *prefix << "Done" << endl;
895  std::cerr << os.str();
896  }
897 }
898 
899 template <class LocalOrdinal, class GlobalOrdinal, class Node>
901  Map(const global_size_t numGlobalElements,
902  const Kokkos::View<const GlobalOrdinal*, device_type>& entryList,
903  const GlobalOrdinal indexBase,
904  const Teuchos::RCP<const Teuchos::Comm<int>>& comm)
905  : comm_(comm)
906  , uniform_(false)
907  , directory_(new Directory<LocalOrdinal, GlobalOrdinal, Node>()) {
908  using Kokkos::LayoutLeft;
909  using Kokkos::subview;
910  using Kokkos::View;
911  using Kokkos::view_alloc;
912  using Kokkos::WithoutInitializing;
913  using std::endl;
914  using Teuchos::arcp;
915  using Teuchos::ArrayView;
916  using Teuchos::as;
917  using Teuchos::broadcast;
918  using Teuchos::outArg;
919  using Teuchos::ptr;
920  using Teuchos::REDUCE_MAX;
921  using Teuchos::REDUCE_MIN;
922  using Teuchos::REDUCE_SUM;
923  using Teuchos::reduceAll;
924  using Teuchos::typeName;
925  using LO = local_ordinal_type;
926  using GO = global_ordinal_type;
927  using GST = global_size_t;
928  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid();
929  const char funcName[] =
930  "Map(gblNumInds,entryList(Kokkos::View),indexBase,comm)";
931 
932  const bool verbose = Details::Behavior::verbose("Map");
933  std::unique_ptr<std::string> prefix;
934  if (verbose) {
935  prefix = Details::createPrefix(
936  comm_.getRawPtr(), "Map", funcName);
937  std::ostringstream os;
938  os << *prefix << "Start" << endl;
939  std::cerr << os.str();
940  }
944  checkMapInputArray("(GST, Kokkos::View, GO, comm)",
945  entryList.data(),
946  static_cast<size_t>(entryList.extent(0)),
947  comm.getRawPtr());
948 
949  // The user has specified the distribution of indices over the
950  // processes, via the input array of global indices on each
951  // process. The distribution is not necessarily contiguous or
952  // equally shared over the processes.
953 
954  // The length of the input array on this process is the number of
955  // local indices to associate with this process, even though the
956  // input array contains global indices. We assume that the number
957  // of local indices on a process can be stored in a size_t;
958  // numLocalElements_ is a size_t, so this variable and that should
959  // have the same type.
960  const size_t numLocalElements(entryList.size());
961 
962  initialNonuniformDebugCheck(funcName, numGlobalElements,
963  numLocalElements, indexBase, comm);
964 
965  // NOTE (mfh 20 Feb 2013, 10 Oct 2016) In some sense, this global
966  // reduction is redundant, since the directory Map will have to do
967  // the same thing. Thus, we could do the scan and broadcast for
968  // the directory Map here, and give the computed offsets to the
969  // directory Map's constructor. However, a reduction costs less
970  // than a scan and broadcast, so this still saves time if users of
971  // this Map don't ever need the Directory (i.e., if they never
972  // call getRemoteIndexList on this Map).
973  if (numGlobalElements != GSTI) {
974  numGlobalElements_ = numGlobalElements; // Use the user's value.
975  } else { // The user wants us to compute the sum.
976  reduceAll(*comm, REDUCE_SUM,
977  static_cast<GST>(numLocalElements),
978  outArg(numGlobalElements_));
979  }
980 
981  // mfh 20 Feb 2013: We've never quite done the right thing for
982  // duplicate GIDs here. Duplicate GIDs have always been counted
983  // distinctly in numLocalElements_, and thus should get a
984  // different LID. However, we've always used std::map or a hash
985  // table for the GID -> LID lookup table, so distinct GIDs always
986  // map to the same LID. Furthermore, the order of the input GID
987  // list matters, so it's not desirable to sort for determining
988  // uniqueness.
989  //
990  // I've chosen for now to write this code as if the input GID list
991  // contains no duplicates. If this is not desired, we could use
992  // the lookup table itself to determine uniqueness: If we haven't
993  // seen the GID before, it gets a new LID and it's added to the
994  // LID -> GID and GID -> LID tables. If we have seen the GID
995  // before, it doesn't get added to either table. I would
996  // implement this, but it would cost more to do the double lookups
997  // in the table (one to check, and one to insert).
998  //
999  // More importantly, since we build the GID -> LID table in (a
1000  // thread-) parallel (way), the order in which duplicate GIDs may
1001  // get inserted is not defined. This would make the assignment of
1002  // LID to GID nondeterministic.
1003 
1004  numLocalElements_ = numLocalElements;
1005  indexBase_ = indexBase;
1006 
1007  minMyGID_ = indexBase_;
1008  maxMyGID_ = indexBase_;
1009 
1010  // NOTE (mfh 27 May 2015): While finding the initial contiguous
1011  // GID range requires looking at all the GIDs in the range,
1012  // dismissing an interval of GIDs only requires looking at the
1013  // first and last GIDs. Thus, we could do binary search backwards
1014  // from the end in order to catch the common case of a contiguous
1015  // interval followed by noncontiguous entries. On the other hand,
1016  // we could just expose this case explicitly as yet another Map
1017  // constructor, and avoid the trouble of detecting it.
1018  if (numLocalElements_ > 0) {
1019  // Find contiguous GID range, with the restriction that the
1020  // beginning of the range starts with the first entry. While
1021  // doing so, fill in the LID -> GID table.
1022  typename decltype(lgMap_)::non_const_type lgMap(view_alloc("lgMap", WithoutInitializing), numLocalElements_);
1023 
1024  // Because you can't use lambdas in constructors on CUDA. Or using private/protected data.
1025  // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
1026  Kokkos::deep_copy(typename device_type::execution_space(), lgMap, entryList);
1027  LO lastContiguousGID_loc;
1028  computeConstantsOnDevice(entryList, minMyGID_, maxMyGID_, firstContiguousGID_, lastContiguousGID_, lastContiguousGID_loc);
1029  LO firstNonContiguous_loc = lastContiguousGID_loc + 1;
1030  auto nonContigGids = Kokkos::subview(entryList, std::pair<size_t, size_t>(firstNonContiguous_loc, entryList.extent(0)));
1031 
1032  // NOTE: We do not fill the glMapHost_ and lgMapHost_ views here. They will be filled lazily later
1033  glMap_ = global_to_local_table_type(nonContigGids,
1034  firstNonContiguous_loc);
1035 
1036  // "Commit" the local-to-global lookup table we filled in above.
1037  lgMap_ = lgMap;
1038 
1039  } else {
1040  minMyGID_ = std::numeric_limits<GlobalOrdinal>::max();
1041  maxMyGID_ = std::numeric_limits<GlobalOrdinal>::lowest();
1042  // This insures tests for GIDs in the range
1043  // [firstContiguousGID_, lastContiguousGID_] fail for processes
1044  // with no local elements.
1045  firstContiguousGID_ = indexBase_ + 1;
1046  lastContiguousGID_ = indexBase_;
1047  // glMap_ was default constructed, so it's already empty.
1048  }
1049 
1050  // Compute the min and max of all processes' GIDs. If
1051  // numLocalElements_ == 0 on this process, minMyGID_ and maxMyGID_
1052  // are both indexBase_. This is wrong, but fixing it would
1053  // require either a fancy sparse all-reduce, or a custom reduction
1054  // operator that ignores invalid values ("invalid" means
1055  // Tpetra::Details::OrdinalTraits<GO>::invalid()).
1056  //
1057  // Also, while we're at it, use the same all-reduce to figure out
1058  // if the Map is distributed. "Distributed" means that there is
1059  // at least one process with a number of local elements less than
1060  // the number of global elements.
1061  //
1062  // We're computing the min and max of all processes' GIDs using a
1063  // single MAX all-reduce, because min(x,y) = -max(-x,-y) (when x
1064  // and y are signed). (This lets us combine the min and max into
1065  // a single all-reduce.) If each process sets localDist=1 if its
1066  // number of local elements is strictly less than the number of
1067  // global elements, and localDist=0 otherwise, then a MAX
1068  // all-reduce on localDist tells us if the Map is distributed (1
1069  // if yes, 0 if no). Thus, we can append localDist onto the end
1070  // of the data and get the global result from the all-reduce.
1071  if (std::numeric_limits<GO>::is_signed) {
1072  // Does my process NOT own all the elements?
1073  const GO localDist =
1074  (as<GST>(numLocalElements_) < numGlobalElements_) ? 1 : 0;
1075 
1076  GO minMaxInput[3];
1077  minMaxInput[0] = -minMyGID_;
1078  minMaxInput[1] = maxMyGID_;
1079  minMaxInput[2] = localDist;
1080 
1081  GO minMaxOutput[3];
1082  minMaxOutput[0] = 0;
1083  minMaxOutput[1] = 0;
1084  minMaxOutput[2] = 0;
1085  reduceAll<int, GO>(*comm, REDUCE_MAX, 3, minMaxInput, minMaxOutput);
1086  minAllGID_ = -minMaxOutput[0];
1087  maxAllGID_ = minMaxOutput[1];
1088  const GO globalDist = minMaxOutput[2];
1089  distributed_ = (comm_->getSize() > 1 && globalDist == 1);
1090  } else { // unsigned; use two reductions
1091  // This is always correct, no matter the signedness of GO.
1092  reduceAll<int, GO>(*comm_, REDUCE_MIN, minMyGID_, outArg(minAllGID_));
1093  reduceAll<int, GO>(*comm_, REDUCE_MAX, maxMyGID_, outArg(maxAllGID_));
1094  distributed_ = checkIsDist();
1095  }
1096 
1097  contiguous_ = false; // "Contiguous" is conservative.
1098 
1099  TEUCHOS_TEST_FOR_EXCEPTION(
1100  minAllGID_ < indexBase_,
1101  std::invalid_argument,
1102  "Tpetra::Map constructor (noncontiguous): "
1103  "Minimum global ID = "
1104  << minAllGID_ << " over all process(es) is "
1105  "less than the given indexBase = "
1106  << indexBase_ << ".");
1107 
1108  // Create the Directory on demand in getRemoteIndexList().
1109  // setupDirectory ();
1110 
1111  if (verbose) {
1112  std::ostringstream os;
1113  os << *prefix << "Done" << endl;
1114  std::cerr << os.str();
1115  }
1116 }
1117 
1118 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1120  if (!Kokkos::is_initialized()) {
1121  std::ostringstream os;
1122  os << "WARNING: Tpetra::Map destructor (~Map()) is being called after "
1123  "Kokkos::finalize() has been called. This is user error! There are "
1124  "two likely causes: "
1125  << std::endl
1126  << " 1. You have a static Tpetra::Map (or RCP or shared_ptr of a Map)"
1127  << std::endl
1128  << " 2. You declare and construct a Tpetra::Map (or RCP or shared_ptr "
1129  "of a Tpetra::Map) at the same scope in main() as Kokkos::finalize() "
1130  "or Tpetra::finalize()."
1131  << std::endl
1132  << std::endl
1133  << "Don't do either of these! Please refer to GitHib Issue #2372."
1134  << std::endl;
1135  ::Tpetra::Details::printOnce(std::cerr, os.str(),
1136  this->getComm().getRawPtr());
1137  } else {
1138  using ::Tpetra::Details::mpiIsFinalized;
1139  using ::Tpetra::Details::mpiIsInitialized;
1140  using ::Tpetra::Details::teuchosCommIsAnMpiComm;
1141 
1142  Teuchos::RCP<const Teuchos::Comm<int>> comm = this->getComm();
1143  if (!comm.is_null() && teuchosCommIsAnMpiComm(*comm) &&
1144  mpiIsInitialized() && mpiIsFinalized()) {
1145  // Tpetra itself does not require MPI, even if building with
1146  // MPI. It is legal to create Tpetra objects that do not use
1147  // MPI, even in an MPI program. However, calling Tpetra stuff
1148  // after MPI_Finalize() has been called is a bad idea, since
1149  // some Tpetra defaults may use MPI if available.
1150  std::ostringstream os;
1151  os << "WARNING: Tpetra::Map destructor (~Map()) is being called after "
1152  "MPI_Finalize() has been called. This is user error! There are "
1153  "two likely causes: "
1154  << std::endl
1155  << " 1. You have a static Tpetra::Map (or RCP or shared_ptr of a Map)"
1156  << std::endl
1157  << " 2. You declare and construct a Tpetra::Map (or RCP or shared_ptr "
1158  "of a Tpetra::Map) at the same scope in main() as MPI_finalize() or "
1159  "Tpetra::finalize()."
1160  << std::endl
1161  << std::endl
1162  << "Don't do either of these! Please refer to GitHib Issue #2372."
1163  << std::endl;
1164  ::Tpetra::Details::printOnce(std::cerr, os.str(), comm.getRawPtr());
1165  }
1166  }
1167  // mfh 20 Mar 2018: We can't check Tpetra::isInitialized() yet,
1168  // because Tpetra does not yet require Tpetra::initialize /
1169  // Tpetra::finalize.
1170 }
1171 
1172 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1174  TEUCHOS_TEST_FOR_EXCEPTION(
1175  getComm().is_null(), std::logic_error,
1176  "Tpetra::Map::isOneToOne: "
1177  "getComm() returns null. Please report this bug to the Tpetra "
1178  "developers.");
1179 
1180  // This is a collective operation, if it hasn't been called before.
1181  setupDirectory();
1182  return directory_->isOneToOne(*this);
1183 }
1184 
1185 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1186 LocalOrdinal
1188  getLocalElement(GlobalOrdinal globalIndex) const {
1189  if (isContiguous()) {
1190  if (globalIndex < getMinGlobalIndex() ||
1191  globalIndex > getMaxGlobalIndex()) {
1192  return Tpetra::Details::OrdinalTraits<LocalOrdinal>::invalid();
1193  }
1194  return static_cast<LocalOrdinal>(globalIndex - getMinGlobalIndex());
1195  } else if (globalIndex >= firstContiguousGID_ &&
1196  globalIndex <= lastContiguousGID_) {
1197  return static_cast<LocalOrdinal>(globalIndex - firstContiguousGID_);
1198  } else {
1199  // If the given global index is not in the table, this returns
1200  // the same value as OrdinalTraits<LocalOrdinal>::invalid().
1201  // glMapHost_ is Host and does not assume UVM
1202  lazyPushToHost();
1203  return glMapHost_.get(globalIndex);
1204  }
1205 }
1206 
1207 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1208 GlobalOrdinal
1210  getGlobalElement(LocalOrdinal localIndex) const {
1211  if (localIndex < getMinLocalIndex() || localIndex > getMaxLocalIndex()) {
1212  return Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid();
1213  }
1214  if (isContiguous()) {
1215  return getMinGlobalIndex() + localIndex;
1216  } else {
1217  // This is a host Kokkos::View access, with no RCP or ArrayRCP
1218  // involvement. As a result, it is thread safe.
1219  //
1220  // lgMapHost_ is a host pointer; this does NOT assume UVM.
1221  lazyPushToHost();
1222  return lgMapHost_[localIndex];
1223  }
1224 }
1225 
1226 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1228  isNodeLocalElement(LocalOrdinal localIndex) const {
1229  if (localIndex < getMinLocalIndex() || localIndex > getMaxLocalIndex()) {
1230  return false;
1231  } else {
1232  return true;
1233  }
1234 }
1235 
1236 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1238  isNodeGlobalElement(GlobalOrdinal globalIndex) const {
1239  return this->getLocalElement(globalIndex) !=
1240  Tpetra::Details::OrdinalTraits<LocalOrdinal>::invalid();
1241 }
1242 
1243 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1245  return uniform_;
1246 }
1247 
1248 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1250  return contiguous_;
1251 }
1252 
1253 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1256  getLocalMap() const {
1257  return local_map_type(glMap_, lgMap_, getIndexBase(),
1258  getMinGlobalIndex(), getMaxGlobalIndex(),
1259  firstContiguousGID_, lastContiguousGID_,
1260  getLocalNumElements(), isContiguous());
1261 }
1262 
1263 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1266  using Teuchos::outArg;
1267  using Teuchos::REDUCE_MIN;
1268  using Teuchos::reduceAll;
1269  //
1270  // Tests that avoid the Boolean all-reduce below by using
1271  // globally consistent quantities.
1272  //
1273  if (this == &map) {
1274  // Pointer equality on one process always implies pointer
1275  // equality on all processes, since Map is immutable.
1276  return true;
1277  } else if (getComm()->getSize() != map.getComm()->getSize()) {
1278  // The two communicators have different numbers of processes.
1279  // It's not correct to call isCompatible() in that case. This
1280  // may result in the all-reduce hanging below.
1281  return false;
1282  } else if (getGlobalNumElements() != map.getGlobalNumElements()) {
1283  // Two Maps are definitely NOT compatible if they have different
1284  // global numbers of indices.
1285  return false;
1286  } else if (isContiguous() && isUniform() &&
1287  map.isContiguous() && map.isUniform()) {
1288  // Contiguous uniform Maps with the same number of processes in
1289  // their communicators, and with the same global numbers of
1290  // indices, are always compatible.
1291  return true;
1292  } else if (!isContiguous() && !map.isContiguous() &&
1293  lgMap_.extent(0) != 0 && map.lgMap_.extent(0) != 0 &&
1294  lgMap_.data() == map.lgMap_.data()) {
1295  // Noncontiguous Maps whose global index lists are nonempty and
1296  // have the same pointer must be the same (and therefore
1297  // contiguous).
1298  //
1299  // Nonempty is important. For example, consider a communicator
1300  // with two processes, and two Maps that share this
1301  // communicator, with zero global indices on the first process,
1302  // and different nonzero numbers of global indices on the second
1303  // process. In that case, on the first process, the pointers
1304  // would both be NULL.
1305  return true;
1306  }
1307 
1308  TEUCHOS_TEST_FOR_EXCEPTION(
1309  getGlobalNumElements() != map.getGlobalNumElements(), std::logic_error,
1310  "Tpetra::Map::isCompatible: There's a bug in this method. We've already "
1311  "checked that this condition is true above, but it's false here. "
1312  "Please report this bug to the Tpetra developers.");
1313 
1314  // Do both Maps have the same number of indices on each process?
1315  const int locallyCompat =
1316  (getLocalNumElements() == map.getLocalNumElements()) ? 1 : 0;
1317 
1318  int globallyCompat = 0;
1319  reduceAll<int, int>(*comm_, REDUCE_MIN, locallyCompat, outArg(globallyCompat));
1320  return (globallyCompat == 1);
1321 }
1322 
1323 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1326  using Teuchos::ArrayView;
1327  using GO = global_ordinal_type;
1328  using size_type = typename ArrayView<const GO>::size_type;
1329 
1330  // If both Maps are contiguous, we can compare their GID ranges
1331  // easily by looking at the min and max GID on this process.
1332  // Otherwise, we'll compare their GID lists. If only one Map is
1333  // contiguous, then we only have to call getLocalElementList() on
1334  // the noncontiguous Map. (It's best to avoid calling it on a
1335  // contiguous Map, since it results in unnecessary storage that
1336  // persists for the lifetime of the Map.)
1337 
1338  if (this == &map) {
1339  // Pointer equality on one process always implies pointer
1340  // equality on all processes, since Map is immutable.
1341  return true;
1342  } else if (getLocalNumElements() != map.getLocalNumElements()) {
1343  return false;
1344  } else if (getMinGlobalIndex() != map.getMinGlobalIndex() ||
1345  getMaxGlobalIndex() != map.getMaxGlobalIndex()) {
1346  return false;
1347  } else {
1348  if (isContiguous()) {
1349  if (map.isContiguous()) {
1350  return true; // min and max match, so the ranges match.
1351  } else { // *this is contiguous, but map is not contiguous
1352  TEUCHOS_TEST_FOR_EXCEPTION(
1353  !this->isContiguous() || map.isContiguous(), std::logic_error,
1354  "Tpetra::Map::locallySameAs: BUG");
1355  ArrayView<const GO> rhsElts = map.getLocalElementList();
1356  const GO minLhsGid = this->getMinGlobalIndex();
1357  const size_type numRhsElts = rhsElts.size();
1358  for (size_type k = 0; k < numRhsElts; ++k) {
1359  const GO curLhsGid = minLhsGid + static_cast<GO>(k);
1360  if (curLhsGid != rhsElts[k]) {
1361  return false; // stop on first mismatch
1362  }
1363  }
1364  return true;
1365  }
1366  } else if (map.isContiguous()) { // *this is not contiguous, but map is
1367  TEUCHOS_TEST_FOR_EXCEPTION(
1368  this->isContiguous() || !map.isContiguous(), std::logic_error,
1369  "Tpetra::Map::locallySameAs: BUG");
1370  ArrayView<const GO> lhsElts = this->getLocalElementList();
1371  const GO minRhsGid = map.getMinGlobalIndex();
1372  const size_type numLhsElts = lhsElts.size();
1373  for (size_type k = 0; k < numLhsElts; ++k) {
1374  const GO curRhsGid = minRhsGid + static_cast<GO>(k);
1375  if (curRhsGid != lhsElts[k]) {
1376  return false; // stop on first mismatch
1377  }
1378  }
1379  return true;
1380  } else if (this->lgMap_.data() == map.lgMap_.data()) {
1381  // Pointers to LID->GID "map" (actually just an array) are the
1382  // same, and the number of GIDs are the same.
1383  return this->getLocalNumElements() == map.getLocalNumElements();
1384  } else { // we actually have to compare the GIDs
1385  if (this->getLocalNumElements() != map.getLocalNumElements()) {
1386  return false; // We already checked above, but check just in case
1387  } else {
1388  using range_type = Kokkos::RangePolicy<LocalOrdinal, typename node_type::execution_space>;
1389 
1390  auto lhsLclMap = getLocalMap();
1391  auto rhsLclMap = map.getLocalMap();
1392 
1393  LocalOrdinal numMismatchedElements = 0;
1394  Kokkos::parallel_reduce(
1395  "Tpetra::Map::locallySameAs",
1396  range_type(0, this->getLocalNumElements()),
1397  KOKKOS_LAMBDA(const LocalOrdinal lid, LocalOrdinal& numMismatches) {
1398  if (lhsLclMap.getGlobalElement(lid) != rhsLclMap.getGlobalElement(lid))
1399  ++numMismatches;
1400  },
1401  numMismatchedElements);
1402 
1403  return (numMismatchedElements == 0);
1404  }
1405  }
1406  }
1407 }
1408 
1409 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1412  if (this == &map)
1413  return true;
1414 
1415  // We are going to check if lmap1 is fitted into lmap2:
1416  // Is lmap1 (map) a subset of lmap2 (this)?
1417  // And do the first lmap1.getLocalNumElements() global elements
1418  // of lmap1,lmap2 owned on each process exactly match?
1419  auto lmap1 = map.getLocalMap();
1420  auto lmap2 = this->getLocalMap();
1421 
1422  auto numLocalElements1 = lmap1.getLocalNumElements();
1423  auto numLocalElements2 = lmap2.getLocalNumElements();
1424 
1425  if (numLocalElements1 > numLocalElements2) {
1426  // There are more indices in the first map on this process than in second map.
1427  return false;
1428  }
1429 
1430  if (lmap1.isContiguous() && lmap2.isContiguous()) {
1431  // When both Maps are contiguous, just check the interval inclusion.
1432  return ((lmap1.getMinGlobalIndex() == lmap2.getMinGlobalIndex()) &&
1433  (lmap1.getMaxGlobalIndex() <= lmap2.getMaxGlobalIndex()));
1434  }
1435 
1436  if (lmap1.getMinGlobalIndex() < lmap2.getMinGlobalIndex() ||
1437  lmap1.getMaxGlobalIndex() > lmap2.getMaxGlobalIndex()) {
1438  // The second map does not include the first map bounds, and thus some of
1439  // the first map global indices are not in the second map.
1440  return false;
1441  }
1442 
1443  using LO = local_ordinal_type;
1444  using range_type =
1445  Kokkos::RangePolicy<LO, typename node_type::execution_space>;
1446 
1447  // Check all elements.
1448  LO numDiff = 0;
1449  Kokkos::parallel_reduce(
1450  "isLocallyFitted",
1451  range_type(0, numLocalElements1),
1452  KOKKOS_LAMBDA(const LO i, LO& diff) {
1453  diff += (lmap1.getGlobalElement(i) != lmap2.getGlobalElement(i));
1454  },
1455  numDiff);
1456 
1457  return (numDiff == 0);
1458 }
1459 
1460 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1463  using Teuchos::outArg;
1464  using Teuchos::REDUCE_MIN;
1465  using Teuchos::reduceAll;
1466  //
1467  // Tests that avoid the Boolean all-reduce below by using
1468  // globally consistent quantities.
1469  //
1470  if (this == &map) {
1471  // Pointer equality on one process always implies pointer
1472  // equality on all processes, since Map is immutable.
1473  return true;
1474  } else if (getComm()->getSize() != map.getComm()->getSize()) {
1475  // The two communicators have different numbers of processes.
1476  // It's not correct to call isSameAs() in that case. This
1477  // may result in the all-reduce hanging below.
1478  return false;
1479  } else if (getGlobalNumElements() != map.getGlobalNumElements()) {
1480  // Two Maps are definitely NOT the same if they have different
1481  // global numbers of indices.
1482  return false;
1483  } else if (getMinAllGlobalIndex() != map.getMinAllGlobalIndex() ||
1484  getMaxAllGlobalIndex() != map.getMaxAllGlobalIndex() ||
1485  getIndexBase() != map.getIndexBase()) {
1486  // If the global min or max global index doesn't match, or if
1487  // the index base doesn't match, then the Maps aren't the same.
1488  return false;
1489  } else if (isDistributed() != map.isDistributed()) {
1490  // One Map is distributed and the other is not, which means that
1491  // the Maps aren't the same.
1492  return false;
1493  } else if (isContiguous() && isUniform() &&
1494  map.isContiguous() && map.isUniform()) {
1495  // Contiguous uniform Maps with the same number of processes in
1496  // their communicators, with the same global numbers of indices,
1497  // and with matching index bases and ranges, must be the same.
1498  return true;
1499  }
1500 
1501  // The two communicators must have the same number of processes,
1502  // with process ranks occurring in the same order. This uses
1503  // MPI_COMM_COMPARE. The MPI 3.1 standard (Section 6.4) says:
1504  // "Operations that access communicators are local and their
1505  // execution does not require interprocess communication."
1506  // However, just to be sure, I'll put this call after the above
1507  // tests that don't communicate.
1508  if (!::Tpetra::Details::congruent(*comm_, *(map.getComm()))) {
1509  return false;
1510  }
1511 
1512  // If we get this far, we need to check local properties and then
1513  // communicate local sameness across all processes.
1514  const int isSame_lcl = locallySameAs(map) ? 1 : 0;
1515 
1516  // Return true if and only if all processes report local sameness.
1517  int isSame_gbl = 0;
1518  reduceAll<int, int>(*comm_, REDUCE_MIN, isSame_lcl, outArg(isSame_gbl));
1519  return isSame_gbl == 1;
1520 }
1521 
1522 namespace { // (anonymous)
1523 template <class LO, class GO, class DT>
1524 class FillLgMap {
1525  public:
1526  FillLgMap(const Kokkos::View<GO*, DT>& lgMap,
1527  const GO startGid)
1528  : lgMap_(lgMap)
1529  , startGid_(startGid) {
1530  Kokkos::RangePolicy<LO, typename DT::execution_space>
1531  range(static_cast<LO>(0), static_cast<LO>(lgMap.size()));
1532  Kokkos::parallel_for(range, *this);
1533  }
1534 
1535  KOKKOS_INLINE_FUNCTION void operator()(const LO& lid) const {
1536  lgMap_(lid) = startGid_ + static_cast<GO>(lid);
1537  }
1538 
1539  private:
1540  const Kokkos::View<GO*, DT> lgMap_;
1541  const GO startGid_;
1542 };
1543 
1544 } // namespace
1545 
1546 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1547 typename Map<LocalOrdinal, GlobalOrdinal, Node>::global_indices_array_type
1549  using std::endl;
1550  using LO = local_ordinal_type;
1551  using GO = global_ordinal_type;
1552  using const_lg_view_type = decltype(lgMap_);
1553  using lg_view_type = typename const_lg_view_type::non_const_type;
1554  const bool debug = Details::Behavior::debug("Map");
1555  const bool verbose = Details::Behavior::verbose("Map");
1556 
1557  std::unique_ptr<std::string> prefix;
1558  if (verbose) {
1559  prefix = Details::createPrefix(
1560  comm_.getRawPtr(), "Map", "getMyGlobalIndices");
1561  std::ostringstream os;
1562  os << *prefix << "Start" << endl;
1563  std::cerr << os.str();
1564  }
1565 
1566  // If the local-to-global mapping doesn't exist yet, and if we
1567  // have local entries, then create and fill the local-to-global
1568  // mapping.
1569  const bool needToCreateLocalToGlobalMapping =
1570  lgMap_.extent(0) == 0 && numLocalElements_ > 0;
1571 
1572  if (needToCreateLocalToGlobalMapping) {
1573  if (verbose) {
1574  std::ostringstream os;
1575  os << *prefix << "Need to create lgMap" << endl;
1576  std::cerr << os.str();
1577  }
1578  if (debug) {
1579  // The local-to-global mapping should have been set up already
1580  // for a noncontiguous map.
1581  TEUCHOS_TEST_FOR_EXCEPTION(!isContiguous(), std::logic_error,
1582  "Tpetra::Map::getMyGlobalIndices: The local-to-global "
1583  "mapping (lgMap_) should have been set up already for a "
1584  "noncontiguous Map. Please report this bug to the Tpetra "
1585  "developers.");
1586  }
1587  const LO numElts = static_cast<LO>(getLocalNumElements());
1588 
1589  using Kokkos::view_alloc;
1590  using Kokkos::WithoutInitializing;
1591  lg_view_type lgMap("lgMap", numElts);
1592  if (verbose) {
1593  std::ostringstream os;
1594  os << *prefix << "Fill lgMap" << endl;
1595  std::cerr << os.str();
1596  }
1597  FillLgMap<LO, GO, device_type> fillIt(lgMap, minMyGID_);
1598 
1599  if (verbose) {
1600  std::ostringstream os;
1601  os << *prefix << "Copy lgMap to lgMapHost" << endl;
1602  std::cerr << os.str();
1603  }
1604 
1605  auto lgMapHost = Kokkos::create_mirror_view(Kokkos::HostSpace(), lgMap);
1606  // DEEP_COPY REVIEW - DEVICE-TO-HOST
1607  auto exec_instance = execution_space();
1608  Kokkos::deep_copy(exec_instance, lgMapHost, lgMap);
1609 
1610  // There's a non-trivial chance we'll grab this on the host,
1611  // so let's make sure the copy finishes
1612  exec_instance.fence();
1613 
1614  // "Commit" the local-to-global lookup table we filled in above.
1615  lgMap_ = lgMap;
1616  lgMapHost_ = lgMapHost;
1617  } else {
1618  lazyPushToHost();
1619  }
1620 
1621  if (verbose) {
1622  std::ostringstream os;
1623  os << *prefix << "Done" << endl;
1624  std::cerr << os.str();
1625  }
1626  return lgMapHost_;
1627 }
1628 
1629 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1630 typename Map<LocalOrdinal, GlobalOrdinal, Node>::global_indices_array_device_type
1632  using std::endl;
1633  using LO = local_ordinal_type;
1634  using GO = global_ordinal_type;
1635  using const_lg_view_type = decltype(lgMap_);
1636  using lg_view_type = typename const_lg_view_type::non_const_type;
1637  const bool debug = Details::Behavior::debug("Map");
1638  const bool verbose = Details::Behavior::verbose("Map");
1639 
1640  std::unique_ptr<std::string> prefix;
1641  if (verbose) {
1642  prefix = Details::createPrefix(
1643  comm_.getRawPtr(), "Map", "getMyGlobalIndicesDevice");
1644  std::ostringstream os;
1645  os << *prefix << "Start" << endl;
1646  std::cerr << os.str();
1647  }
1648 
1649  // If the local-to-global mapping doesn't exist yet, and if we
1650  // have local entries, then create and fill the local-to-global
1651  // mapping.
1652  const bool needToCreateLocalToGlobalMapping =
1653  lgMap_.extent(0) == 0 && numLocalElements_ > 0;
1654 
1655  if (needToCreateLocalToGlobalMapping) {
1656  if (verbose) {
1657  std::ostringstream os;
1658  os << *prefix << "Need to create lgMap" << endl;
1659  std::cerr << os.str();
1660  }
1661  if (debug) {
1662  // The local-to-global mapping should have been set up already
1663  // for a noncontiguous map.
1664  TEUCHOS_TEST_FOR_EXCEPTION(!isContiguous(), std::logic_error,
1665  "Tpetra::Map::getMyGlobalIndices: The local-to-global "
1666  "mapping (lgMap_) should have been set up already for a "
1667  "noncontiguous Map. Please report this bug to the Tpetra "
1668  "developers.");
1669  }
1670  const LO numElts = static_cast<LO>(getLocalNumElements());
1671 
1672  using Kokkos::view_alloc;
1673  using Kokkos::WithoutInitializing;
1674  lg_view_type lgMap("lgMap", numElts);
1675  if (verbose) {
1676  std::ostringstream os;
1677  os << *prefix << "Fill lgMap" << endl;
1678  std::cerr << os.str();
1679  }
1680  FillLgMap<LO, GO, device_type> fillIt(lgMap, minMyGID_);
1681 
1682  // "Commit" the local-to-global lookup table we filled in above.
1683  lgMap_ = lgMap;
1684  }
1685 
1686  if (verbose) {
1687  std::ostringstream os;
1688  os << *prefix << "Done" << endl;
1689  std::cerr << os.str();
1690  }
1691  return lgMap_;
1692 }
1693 
1694 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1695 Teuchos::ArrayView<const GlobalOrdinal>
1697  using GO = global_ordinal_type;
1698 
1699  // If the local-to-global mapping doesn't exist yet, and if we
1700  // have local entries, then create and fill the local-to-global
1701  // mapping.
1702  (void)this->getMyGlobalIndices();
1703 
1704  // This does NOT assume UVM; lgMapHost_ is a host pointer.
1705  lazyPushToHost();
1706  const GO* lgMapHostRawPtr = lgMapHost_.data();
1707  // The third argument forces ArrayView not to try to track memory
1708  // in a debug build. We have to use it because the memory does
1709  // not belong to a Teuchos memory management class.
1710  return Teuchos::ArrayView<const GO>(
1711  lgMapHostRawPtr,
1712  lgMapHost_.extent(0),
1713  Teuchos::RCP_DISABLE_NODE_LOOKUP);
1714 }
1715 
1716 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1718  return distributed_;
1719 }
1720 
1721 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1723  using Teuchos::TypeNameTraits;
1724  std::ostringstream os;
1725 
1726  os << "Tpetra::Map: {"
1727  << "LocalOrdinalType: " << TypeNameTraits<LocalOrdinal>::name()
1728  << ", GlobalOrdinalType: " << TypeNameTraits<GlobalOrdinal>::name()
1729  << ", NodeType: " << TypeNameTraits<Node>::name();
1730  if (this->getObjectLabel() != "") {
1731  os << ", Label: \"" << this->getObjectLabel() << "\"";
1732  }
1733  os << ", Global number of entries: " << getGlobalNumElements()
1734  << ", Number of processes: " << getComm()->getSize()
1735  << ", Uniform: " << (isUniform() ? "true" : "false")
1736  << ", Contiguous: " << (isContiguous() ? "true" : "false")
1737  << ", Distributed: " << (isDistributed() ? "true" : "false")
1738  << "}";
1739  return os.str();
1740 }
1741 
1746 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1747 std::string
1749  localDescribeToString(const Teuchos::EVerbosityLevel vl) const {
1750  using LO = local_ordinal_type;
1751  using std::endl;
1752 
1753  // This preserves current behavior of Map.
1754  if (vl < Teuchos::VERB_HIGH) {
1755  return std::string();
1756  }
1757  auto outStringP = Teuchos::rcp(new std::ostringstream());
1758  Teuchos::RCP<Teuchos::FancyOStream> outp =
1759  Teuchos::getFancyOStream(outStringP);
1760  Teuchos::FancyOStream& out = *outp;
1761 
1762  auto comm = this->getComm();
1763  const int myRank = comm->getRank();
1764  const int numProcs = comm->getSize();
1765  out << "Process " << myRank << " of " << numProcs << ":" << endl;
1766  Teuchos::OSTab tab1(out);
1767 
1768  const LO numEnt = static_cast<LO>(this->getLocalNumElements());
1769  out << "My number of entries: " << numEnt << endl
1770  << "My minimum global index: " << this->getMinGlobalIndex() << endl
1771  << "My maximum global index: " << this->getMaxGlobalIndex() << endl;
1772 
1773  if (vl == Teuchos::VERB_EXTREME) {
1774  out << "My global indices: [";
1775  const LO minLclInd = this->getMinLocalIndex();
1776  for (LO k = 0; k < numEnt; ++k) {
1777  out << minLclInd + this->getGlobalElement(k);
1778  if (k + 1 < numEnt) {
1779  out << ", ";
1780  }
1781  }
1782  out << "]" << endl;
1783  }
1784 
1785  out.flush(); // make sure the ostringstream got everything
1786  return outStringP->str();
1787 }
1788 
1789 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1791  describe(Teuchos::FancyOStream& out,
1792  const Teuchos::EVerbosityLevel verbLevel) const {
1793  using std::endl;
1794  using Teuchos::TypeNameTraits;
1795  using Teuchos::VERB_DEFAULT;
1796  using Teuchos::VERB_HIGH;
1797  using Teuchos::VERB_LOW;
1798  using Teuchos::VERB_NONE;
1799  using LO = local_ordinal_type;
1800  using GO = global_ordinal_type;
1801  const Teuchos::EVerbosityLevel vl =
1802  (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
1803 
1804  if (vl == VERB_NONE) {
1805  return; // don't print anything
1806  }
1807  // If this Map's Comm is null, then the Map does not participate
1808  // in collective operations with the other processes. In that
1809  // case, it is not even legal to call this method. The reasonable
1810  // thing to do in that case is nothing.
1811  auto comm = this->getComm();
1812  if (comm.is_null()) {
1813  return;
1814  }
1815  const int myRank = comm->getRank();
1816  const int numProcs = comm->getSize();
1817 
1818  // Only Process 0 should touch the output stream, but this method
1819  // in general may need to do communication. Thus, we may need to
1820  // preserve the current tab level across multiple "if (myRank ==
1821  // 0) { ... }" inner scopes. This is why we sometimes create
1822  // OSTab instances by pointer, instead of by value. We only need
1823  // to create them by pointer if the tab level must persist through
1824  // multiple inner scopes.
1825  Teuchos::RCP<Teuchos::OSTab> tab0, tab1;
1826 
1827  if (myRank == 0) {
1828  // At every verbosity level but VERB_NONE, Process 0 prints.
1829  // By convention, describe() always begins with a tab before
1830  // printing.
1831  tab0 = Teuchos::rcp(new Teuchos::OSTab(out));
1832  out << "\"Tpetra::Map\":" << endl;
1833  tab1 = Teuchos::rcp(new Teuchos::OSTab(out));
1834  {
1835  out << "Template parameters:" << endl;
1836  Teuchos::OSTab tab2(out);
1837  out << "LocalOrdinal: " << TypeNameTraits<LO>::name() << endl
1838  << "GlobalOrdinal: " << TypeNameTraits<GO>::name() << endl
1839  << "Node: " << TypeNameTraits<Node>::name() << endl;
1840  }
1841  const std::string label = this->getObjectLabel();
1842  if (label != "") {
1843  out << "Label: \"" << label << "\"" << endl;
1844  }
1845  out << "Global number of entries: " << getGlobalNumElements() << endl
1846  << "Minimum global index: " << getMinAllGlobalIndex() << endl
1847  << "Maximum global index: " << getMaxAllGlobalIndex() << endl
1848  << "Index base: " << getIndexBase() << endl
1849  << "Number of processes: " << numProcs << endl
1850  << "Uniform: " << (isUniform() ? "true" : "false") << endl
1851  << "Contiguous: " << (isContiguous() ? "true" : "false") << endl
1852  << "Distributed: " << (isDistributed() ? "true" : "false") << endl;
1853  }
1854 
1855  // This is collective over the Map's communicator.
1856  if (vl >= VERB_HIGH) { // VERB_HIGH or VERB_EXTREME
1857  const std::string lclStr = this->localDescribeToString(vl);
1858  Tpetra::Details::gathervPrint(out, lclStr, *comm);
1859  }
1860 }
1861 
1862 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1863 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>>
1865  replaceCommWithSubset(const Teuchos::RCP<const Teuchos::Comm<int>>& newComm) const {
1866  using Teuchos::RCP;
1867  using Teuchos::rcp;
1868  using GST = global_size_t;
1869  using LO = local_ordinal_type;
1870  using GO = global_ordinal_type;
1871  using map_type = Map<LO, GO, Node>;
1872 
1873  // mfh 26 Mar 2013: The lazy way to do this is simply to recreate
1874  // the Map by calling its ordinary public constructor, using the
1875  // original Map's data. This only involves O(1) all-reduces over
1876  // the new communicator, which in the common case only includes a
1877  // small number of processes.
1878 
1879  // Create the Map to return.
1880  if (newComm.is_null() || newComm->getSize() < 1) {
1881  return Teuchos::null; // my process does not participate in the new Map
1882  } else if (newComm->getSize() == 1) {
1883  lazyPushToHost();
1884 
1885  // The case where the new communicator has only one process is
1886  // easy. We don't have to communicate to get all the
1887  // information we need. Use the default comm to create the new
1888  // Map, then fill in all the fields directly.
1889  RCP<map_type> newMap(new map_type());
1890 
1891  newMap->comm_ = newComm;
1892  // mfh 07 Oct 2016: Preserve original behavior, even though the
1893  // original index base may no longer be the globally min global
1894  // index. See #616 for why this doesn't matter so much anymore.
1895  newMap->indexBase_ = this->indexBase_;
1896  newMap->numGlobalElements_ = this->numLocalElements_;
1897  newMap->numLocalElements_ = this->numLocalElements_;
1898  newMap->minMyGID_ = this->minMyGID_;
1899  newMap->maxMyGID_ = this->maxMyGID_;
1900  newMap->minAllGID_ = this->minMyGID_;
1901  newMap->maxAllGID_ = this->maxMyGID_;
1902  newMap->firstContiguousGID_ = this->firstContiguousGID_;
1903  newMap->lastContiguousGID_ = this->lastContiguousGID_;
1904  // Since the new communicator has only one process, neither
1905  // uniformity nor contiguity have changed.
1906  newMap->uniform_ = this->uniform_;
1907  newMap->contiguous_ = this->contiguous_;
1908  // The new communicator only has one process, so the new Map is
1909  // not distributed.
1910  newMap->distributed_ = false;
1911  newMap->lgMap_ = this->lgMap_;
1912  newMap->lgMapHost_ = this->lgMapHost_;
1913  newMap->glMap_ = this->glMap_;
1914  newMap->glMapHost_ = this->glMapHost_;
1915  // It's OK not to initialize the new Map's Directory.
1916  // This is initialized lazily, on first call to getRemoteIndexList.
1917 
1918  return newMap;
1919  } else { // newComm->getSize() != 1
1920  // Even if the original Map is contiguous, the new Map might not
1921  // be, especially if the excluded processes have ranks != 0 or
1922  // newComm->getSize()-1. The common case for this method is to
1923  // exclude many (possibly even all but one) processes, so it
1924  // likely doesn't pay to do the global communication (over the
1925  // original communicator) to figure out whether we can optimize
1926  // the result Map. Thus, we just set up the result Map as
1927  // noncontiguous.
1928  //
1929  // TODO (mfh 07 Oct 2016) We don't actually need to reconstruct
1930  // the global-to-local table, etc. Optimize this code path to
1931  // avoid unnecessary local work.
1932 
1933  // Make Map (re)compute the global number of elements.
1934  const GST RECOMPUTE = Tpetra::Details::OrdinalTraits<GST>::invalid();
1935  // TODO (mfh 07 Oct 2016) If we use any Map constructor, we have
1936  // to use the noncontiguous Map constructor, since the new Map
1937  // might not be contiguous. Even if the old Map was contiguous,
1938  // some process in the "middle" might have been excluded. If we
1939  // want to avoid local work, we either have to do the setup by
1940  // hand, or write a new Map constructor.
1941 #if 1
1942  // The disabled code here throws the following exception in
1943  // Map's replaceCommWithSubset test:
1944  //
1945  // Throw test that evaluated to true: static_cast<unsigned long long> (numKeys) > static_cast<unsigned long long> (::Kokkos::ArithTraits<ValueType>::max ())
1946  // 10:
1947  // 10: Tpetra::Details::FixedHashTable: The number of keys -3 is greater than the maximum representable ValueType value 2147483647. This means that it is not possible to use this constructor.
1948  // 10: Process 3: origComm->replaceCommWithSubset(subsetComm) threw an exception: /scratch/prj/Trilinos/Trilinos/packages/tpetra/core/src/Tpetra_Details_FixedHashTable_def.hpp:1044:
1949 
1950  auto lgMap = this->getMyGlobalIndices();
1951  using size_type =
1952  typename std::decay<decltype(lgMap.extent(0))>::type;
1953  const size_type lclNumInds =
1954  static_cast<size_type>(this->getLocalNumElements());
1955  using Teuchos::TypeNameTraits;
1956  TEUCHOS_TEST_FOR_EXCEPTION(lgMap.extent(0) != lclNumInds, std::logic_error,
1957  "Tpetra::Map::replaceCommWithSubset: Result of getMyGlobalIndices() "
1958  "has length "
1959  << lgMap.extent(0) << " (of type " << TypeNameTraits<size_type>::name() << ") != this->getLocalNumElements()"
1960  " = "
1961  << this->getLocalNumElements() << ". The latter, upon being "
1962  "cast to size_type = "
1963  << TypeNameTraits<size_type>::name() << ", "
1964  "becomes "
1965  << lclNumInds << ". Please report this bug to the Tpetra "
1966  "developers.");
1967 #else
1968  Teuchos::ArrayView<const GO> lgMap = this->getLocalElementList();
1969 #endif // 1
1970 
1971  const GO indexBase = this->getIndexBase();
1972  // map stores HostSpace of CudaSpace but constructor is still CudaUVMSpace
1973  auto lgMap_device = Kokkos::create_mirror_view_and_copy(device_type(), lgMap);
1974  return rcp(new map_type(RECOMPUTE, lgMap_device, indexBase, newComm));
1975  }
1976 }
1977 
1978 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1979 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>>
1982  using Teuchos::Comm;
1983  using Teuchos::null;
1984  using Teuchos::outArg;
1985  using Teuchos::RCP;
1986  using Teuchos::rcp;
1987  using Teuchos::REDUCE_MIN;
1988  using Teuchos::reduceAll;
1989 
1990  // Create the new communicator. split() returns a valid
1991  // communicator on all processes. On processes where color == 0,
1992  // ignore the result. Passing key == 0 tells MPI to order the
1993  // processes in the new communicator by their rank in the old
1994  // communicator.
1995  const int color = (numLocalElements_ == 0) ? 0 : 1;
1996  // MPI_Comm_split must be called collectively over the original
1997  // communicator. We can't just call it on processes with color
1998  // one, even though we will ignore its result on processes with
1999  // color zero.
2000  RCP<const Comm<int>> newComm = comm_->split(color, 0);
2001  if (color == 0) {
2002  newComm = null;
2003  }
2004 
2005  // Create the Map to return.
2006  if (newComm.is_null()) {
2007  return null; // my process does not participate in the new Map
2008  } else {
2009  RCP<Map> map = rcp(new Map());
2010 
2011  map->comm_ = newComm;
2012  map->indexBase_ = indexBase_;
2013  map->numGlobalElements_ = numGlobalElements_;
2014  map->numLocalElements_ = numLocalElements_;
2015  map->minMyGID_ = minMyGID_;
2016  map->maxMyGID_ = maxMyGID_;
2017  map->minAllGID_ = minAllGID_;
2018  map->maxAllGID_ = maxAllGID_;
2019  map->firstContiguousGID_ = firstContiguousGID_;
2020  map->lastContiguousGID_ = lastContiguousGID_;
2021 
2022  // Uniformity and contiguity have not changed. The directory
2023  // has changed, but we've taken care of that above.
2024  map->uniform_ = uniform_;
2025  map->contiguous_ = contiguous_;
2026 
2027  // If the original Map was NOT distributed, then the new Map
2028  // cannot be distributed.
2029  //
2030  // If the number of processes in the new communicator is 1, then
2031  // the new Map is not distributed.
2032  //
2033  // Otherwise, we have to check the new Map using an all-reduce
2034  // (over the new communicator). For example, the original Map
2035  // may have had some processes with zero elements, and all other
2036  // processes with the same number of elements as in the whole
2037  // Map. That Map is technically distributed, because of the
2038  // processes with zero elements. Removing those processes would
2039  // make the new Map locally replicated.
2040  if (!distributed_ || newComm->getSize() == 1) {
2041  map->distributed_ = false;
2042  } else {
2043  const int iOwnAllGids = (numLocalElements_ == numGlobalElements_) ? 1 : 0;
2044  int allProcsOwnAllGids = 0;
2045  reduceAll<int, int>(*newComm, REDUCE_MIN, iOwnAllGids, outArg(allProcsOwnAllGids));
2046  map->distributed_ = (allProcsOwnAllGids == 1) ? false : true;
2047  }
2048 
2049  map->lgMap_ = lgMap_;
2050  map->lgMapHost_ = lgMapHost_;
2051  map->glMap_ = glMap_;
2052  map->glMapHost_ = glMapHost_;
2053 
2054  // Map's default constructor creates an uninitialized Directory.
2055  // The Directory will be initialized on demand in
2056  // getRemoteIndexList().
2057  //
2058  // FIXME (mfh 26 Mar 2013) It should be possible to "filter" the
2059  // directory more efficiently than just recreating it. If
2060  // directory recreation proves a bottleneck, we can always
2061  // revisit this. On the other hand, Directory creation is only
2062  // collective over the new, presumably much smaller
2063  // communicator, so it may not be worth the effort to optimize.
2064 
2065  return map;
2066  }
2067 }
2068 
2069 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2071  TEUCHOS_TEST_FOR_EXCEPTION(
2072  directory_.is_null(), std::logic_error,
2073  "Tpetra::Map::setupDirectory: "
2074  "The Directory is null. "
2075  "Please report this bug to the Tpetra developers.");
2076 
2077  // Only create the Directory if it hasn't been created yet.
2078  // This is a collective operation.
2079  if (!directory_->initialized()) {
2080  directory_->initialize(*this);
2081  }
2082 }
2083 
2084 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2087  getRemoteIndexList(const Teuchos::ArrayView<const GlobalOrdinal>& GIDs,
2088  const Teuchos::ArrayView<int>& PIDs,
2089  const Teuchos::ArrayView<LocalOrdinal>& LIDs) const {
2091  using std::endl;
2092  using Tpetra::Details::OrdinalTraits;
2093  using size_type = Teuchos::ArrayView<int>::size_type;
2094 
2095  const bool verbose = Details::Behavior::verbose("Map");
2096  const size_t maxNumToPrint = verbose ? Details::Behavior::verbosePrintCountThreshold() : size_t(0);
2097  std::unique_ptr<std::string> prefix;
2098  if (verbose) {
2099  prefix = Details::createPrefix(comm_.getRawPtr(),
2100  "Map", "getRemoteIndexList(GIDs,PIDs,LIDs)");
2101  std::ostringstream os;
2102  os << *prefix << "Start: ";
2103  verbosePrintArray(os, GIDs, "GIDs", maxNumToPrint);
2104  os << endl;
2105  std::cerr << os.str();
2106  }
2107 
2108  // Empty Maps (i.e., containing no indices on any processes in the
2109  // Map's communicator) are perfectly valid. In that case, if the
2110  // input GID list is nonempty, we fill the output arrays with
2111  // invalid values, and return IDNotPresent to notify the caller.
2112  // It's perfectly valid to give getRemoteIndexList GIDs that the
2113  // Map doesn't own. SubmapImport test 2 needs this functionality.
2114  if (getGlobalNumElements() == 0) {
2115  if (GIDs.size() == 0) {
2116  if (verbose) {
2117  std::ostringstream os;
2118  os << *prefix << "Done; both Map & input are empty" << endl;
2119  std::cerr << os.str();
2120  }
2121  return AllIDsPresent; // trivially
2122  } else {
2123  if (verbose) {
2124  std::ostringstream os;
2125  os << *prefix << "Done: Map is empty on all processes, "
2126  "so all output PIDs & LIDs are invalid (-1)."
2127  << endl;
2128  std::cerr << os.str();
2129  }
2130  for (size_type k = 0; k < PIDs.size(); ++k) {
2131  PIDs[k] = OrdinalTraits<int>::invalid();
2132  }
2133  for (size_type k = 0; k < LIDs.size(); ++k) {
2134  LIDs[k] = OrdinalTraits<LocalOrdinal>::invalid();
2135  }
2136  return IDNotPresent;
2137  }
2138  }
2139 
2140  // getRemoteIndexList must be called collectively, and Directory
2141  // initialization is collective too, so it's OK to initialize the
2142  // Directory on demand.
2143 
2144  if (verbose) {
2145  std::ostringstream os;
2146  os << *prefix << "Call setupDirectory" << endl;
2147  std::cerr << os.str();
2148  }
2149  setupDirectory();
2150  if (verbose) {
2151  std::ostringstream os;
2152  os << *prefix << "Call directory_->getDirectoryEntries" << endl;
2153  std::cerr << os.str();
2154  }
2155  const Tpetra::LookupStatus retVal =
2156  directory_->getDirectoryEntries(*this, GIDs, PIDs, LIDs);
2157  if (verbose) {
2158  std::ostringstream os;
2159  os << *prefix << "Done; getDirectoryEntries returned "
2160  << (retVal == IDNotPresent ? "IDNotPresent" : "AllIDsPresent")
2161  << "; ";
2162  verbosePrintArray(os, PIDs, "PIDs", maxNumToPrint);
2163  os << ", ";
2164  verbosePrintArray(os, LIDs, "LIDs", maxNumToPrint);
2165  os << endl;
2166  std::cerr << os.str();
2167  }
2168  return retVal;
2169 }
2170 
2171 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2174  getRemoteIndexList(const Teuchos::ArrayView<const GlobalOrdinal>& GIDs,
2175  const Teuchos::ArrayView<int>& PIDs) const {
2177  using std::endl;
2178 
2179  const bool verbose = Details::Behavior::verbose("Map");
2180  const size_t maxNumToPrint = verbose ? Details::Behavior::verbosePrintCountThreshold() : size_t(0);
2181  std::unique_ptr<std::string> prefix;
2182  if (verbose) {
2183  prefix = Details::createPrefix(comm_.getRawPtr(),
2184  "Map", "getRemoteIndexList(GIDs,PIDs)");
2185  std::ostringstream os;
2186  os << *prefix << "Start: ";
2187  verbosePrintArray(os, GIDs, "GIDs", maxNumToPrint);
2188  os << endl;
2189  std::cerr << os.str();
2190  }
2191 
2192  if (getGlobalNumElements() == 0) {
2193  if (GIDs.size() == 0) {
2194  if (verbose) {
2195  std::ostringstream os;
2196  os << *prefix << "Done; both Map & input are empty" << endl;
2197  std::cerr << os.str();
2198  }
2199  return AllIDsPresent; // trivially
2200  } else {
2201  if (verbose) {
2202  std::ostringstream os;
2203  os << *prefix << "Done: Map is empty on all processes, "
2204  "so all output PIDs are invalid (-1)."
2205  << endl;
2206  std::cerr << os.str();
2207  }
2208  for (Teuchos::ArrayView<int>::size_type k = 0; k < PIDs.size(); ++k) {
2209  PIDs[k] = Tpetra::Details::OrdinalTraits<int>::invalid();
2210  }
2211  return IDNotPresent;
2212  }
2213  }
2214 
2215  // getRemoteIndexList must be called collectively, and Directory
2216  // initialization is collective too, so it's OK to initialize the
2217  // Directory on demand.
2218 
2219  if (verbose) {
2220  std::ostringstream os;
2221  os << *prefix << "Call setupDirectory" << endl;
2222  std::cerr << os.str();
2223  }
2224  setupDirectory();
2225  if (verbose) {
2226  std::ostringstream os;
2227  os << *prefix << "Call directory_->getDirectoryEntries" << endl;
2228  std::cerr << os.str();
2229  }
2230  const Tpetra::LookupStatus retVal =
2231  directory_->getDirectoryEntries(*this, GIDs, PIDs);
2232  if (verbose) {
2233  std::ostringstream os;
2234  os << *prefix << "Done; getDirectoryEntries returned "
2235  << (retVal == IDNotPresent ? "IDNotPresent" : "AllIDsPresent")
2236  << "; ";
2237  verbosePrintArray(os, PIDs, "PIDs", maxNumToPrint);
2238  os << endl;
2239  std::cerr << os.str();
2240  }
2241  return retVal;
2242 }
2243 
2244 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2246  using exec_space = typename Node::device_type::execution_space;
2247  if (lgMap_.extent(0) != lgMapHost_.extent(0)) {
2248  Tpetra::Details::ProfilingRegion pr("Map::lazyPushToHost() - pushing data");
2249  // NOTE: We check lgMap_ and not glMap_, since the latter can
2250  // be somewhat error prone for contiguous maps
2251 
2252  // create_mirror_view preserves const-ness. create_mirror does not
2253  auto lgMap_host = Kokkos::create_mirror(Kokkos::HostSpace(), lgMap_);
2254 
2255  // Since this was computed on the default stream, we can copy on the stream and then fence
2256  // the stream
2257  Kokkos::deep_copy(exec_space(), lgMap_host, lgMap_);
2258  exec_space().fence();
2259  lgMapHost_ = lgMap_host;
2260 
2261  // Make host version - when memory spaces match these just do trivial assignment
2262  glMapHost_ = global_to_local_table_host_type(glMap_);
2263  }
2264 }
2265 
2266 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2267 Teuchos::RCP<const Teuchos::Comm<int>>
2269  return comm_;
2270 }
2271 
2272 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2274  checkIsDist() const {
2275  using std::endl;
2276  using Teuchos::as;
2277  using Teuchos::outArg;
2278  using Teuchos::REDUCE_MIN;
2279  using Teuchos::reduceAll;
2280 
2281  const bool verbose = Details::Behavior::verbose("Map");
2282  std::unique_ptr<std::string> prefix;
2283  if (verbose) {
2284  prefix = Details::createPrefix(
2285  comm_.getRawPtr(), "Map", "checkIsDist");
2286  std::ostringstream os;
2287  os << *prefix << "Start" << endl;
2288  std::cerr << os.str();
2289  }
2290 
2291  bool global = false;
2292  if (comm_->getSize() > 1) {
2293  // The communicator has more than one process, but that doesn't
2294  // necessarily mean the Map is distributed.
2295  int localRep = 0;
2296  if (numGlobalElements_ == as<global_size_t>(numLocalElements_)) {
2297  // The number of local elements on this process equals the
2298  // number of global elements.
2299  //
2300  // NOTE (mfh 22 Nov 2011) Does this still work if there were
2301  // duplicates in the global ID list on input (the third Map
2302  // constructor), so that the number of local elements (which
2303  // are not duplicated) on this process could be less than the
2304  // number of global elements, even if this process owns all
2305  // the elements?
2306  localRep = 1;
2307  }
2308  int allLocalRep;
2309  reduceAll<int, int>(*comm_, REDUCE_MIN, localRep, outArg(allLocalRep));
2310  if (allLocalRep != 1) {
2311  // At least one process does not own all the elements.
2312  // This makes the Map a distributed Map.
2313  global = true;
2314  }
2315  }
2316  // If the communicator has only one process, then the Map is not
2317  // distributed.
2318 
2319  if (verbose) {
2320  std::ostringstream os;
2321  os << *prefix << "Done; global=" << (global ? "true" : "false")
2322  << endl;
2323  std::cerr << os.str();
2324  }
2325  return global;
2326 }
2327 
2328 } // namespace Tpetra
2329 
2330 template <class LocalOrdinal, class GlobalOrdinal>
2331 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal>>
2332 Tpetra::createLocalMap(const size_t numElements,
2333  const Teuchos::RCP<const Teuchos::Comm<int>>& comm) {
2334  typedef LocalOrdinal LO;
2335  typedef GlobalOrdinal GO;
2336  using NT = typename ::Tpetra::Map<LO, GO>::node_type;
2337  return createLocalMapWithNode<LO, GO, NT>(numElements, comm);
2338 }
2339 
2340 template <class LocalOrdinal, class GlobalOrdinal>
2341 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal>>
2342 Tpetra::createUniformContigMap(const global_size_t numElements,
2343  const Teuchos::RCP<const Teuchos::Comm<int>>& comm) {
2344  typedef LocalOrdinal LO;
2345  typedef GlobalOrdinal GO;
2346  using NT = typename ::Tpetra::Map<LO, GO>::node_type;
2347  return createUniformContigMapWithNode<LO, GO, NT>(numElements, comm);
2348 }
2349 
2350 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2351 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>
2352 Tpetra::createUniformContigMapWithNode(const global_size_t numElements,
2353  const Teuchos::RCP<const Teuchos::Comm<int>>& comm) {
2354  using Teuchos::rcp;
2356  const GlobalOrdinal indexBase = static_cast<GlobalOrdinal>(0);
2357 
2358  return rcp(new map_type(numElements, indexBase, comm, GloballyDistributed));
2359 }
2360 
2361 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2362 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>
2363 Tpetra::createLocalMapWithNode(const size_t numElements,
2364  const Teuchos::RCP<const Teuchos::Comm<int>>& comm) {
2365  using Teuchos::rcp;
2366  using Tpetra::global_size_t;
2368  const GlobalOrdinal indexBase = 0;
2369  const global_size_t globalNumElts = static_cast<global_size_t>(numElements);
2370 
2371  return rcp(new map_type(globalNumElts, indexBase, comm, LocallyReplicated));
2372 }
2373 
2374 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2375 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>
2377  const size_t localNumElements,
2378  const Teuchos::RCP<const Teuchos::Comm<int>>& comm) {
2379  using Teuchos::rcp;
2381  const GlobalOrdinal indexBase = 0;
2382 
2383  return rcp(new map_type(numElements, localNumElements, indexBase, comm));
2384 }
2385 
2386 template <class LocalOrdinal, class GlobalOrdinal>
2387 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal>>
2389  const size_t localNumElements,
2390  const Teuchos::RCP<const Teuchos::Comm<int>>& comm) {
2391  typedef LocalOrdinal LO;
2392  typedef GlobalOrdinal GO;
2393  using NT = typename Tpetra::Map<LO, GO>::node_type;
2394 
2395  return Tpetra::createContigMapWithNode<LO, GO, NT>(numElements, localNumElements, comm);
2396 }
2397 
2398 template <class LocalOrdinal, class GlobalOrdinal>
2399 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal>>
2400 Tpetra::createNonContigMap(const Teuchos::ArrayView<const GlobalOrdinal>& elementList,
2401  const Teuchos::RCP<const Teuchos::Comm<int>>& comm) {
2402  typedef LocalOrdinal LO;
2403  typedef GlobalOrdinal GO;
2404  using NT = typename Tpetra::Map<LO, GO>::node_type;
2405 
2406  return Tpetra::createNonContigMapWithNode<LO, GO, NT>(elementList, comm);
2407 }
2408 
2409 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2410 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>
2411 Tpetra::createNonContigMapWithNode(const Teuchos::ArrayView<const GlobalOrdinal>& elementList,
2412  const Teuchos::RCP<const Teuchos::Comm<int>>& comm) {
2413  using Teuchos::rcp;
2415  using GST = Tpetra::global_size_t;
2416  const GST INV = Tpetra::Details::OrdinalTraits<GST>::invalid();
2417  // FIXME (mfh 22 Jul 2016) This is what I found here, but maybe this
2418  // shouldn't be zero, given that the index base is supposed to equal
2419  // the globally min global index?
2420  const GlobalOrdinal indexBase = 0;
2421 
2422  return rcp(new map_type(INV, elementList, indexBase, comm));
2423 }
2424 
2425 template <class LO, class GO, class NT>
2426 Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>
2427 Tpetra::createOneToOne(const Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& M) {
2429  using std::cerr;
2430  using std::endl;
2431  using Teuchos::Array;
2432  using Teuchos::ArrayView;
2433  using Teuchos::as;
2434  using Teuchos::rcp;
2435  using map_type = Tpetra::Map<LO, GO, NT>;
2436  using GST = global_size_t;
2437 
2438  const bool verbose = Details::Behavior::verbose("Map");
2439  std::unique_ptr<std::string> prefix;
2440  if (verbose) {
2441  auto comm = M.is_null() ? Teuchos::null : M->getComm();
2442  prefix = Details::createPrefix(
2443  comm.getRawPtr(), "createOneToOne(Map)");
2444  std::ostringstream os;
2445  os << *prefix << "Start" << endl;
2446  cerr << os.str();
2447  }
2448  const size_t maxNumToPrint = verbose ? Details::Behavior::verbosePrintCountThreshold() : size_t(0);
2449  const GST GINV = Tpetra::Details::OrdinalTraits<GST>::invalid();
2450  const int myRank = M->getComm()->getRank();
2451 
2452  // Bypasses for special cases where either M is known to be
2453  // one-to-one, or the one-to-one version of M is easy to compute.
2454  // This is why we take M as an RCP, not as a const reference -- so
2455  // that we can return M itself if it is 1-to-1.
2456  if (!M->isDistributed()) {
2457  // For a locally replicated Map, we assume that users want to push
2458  // all the GIDs to Process 0.
2459 
2460  // mfh 05 Nov 2013: getGlobalNumElements() does indeed return what
2461  // you think it should return, in this special case of a locally
2462  // replicated contiguous Map.
2463  const GST numGlobalEntries = M->getGlobalNumElements();
2464  if (M->isContiguous()) {
2465  const size_t numLocalEntries =
2466  (myRank == 0) ? as<size_t>(numGlobalEntries) : size_t(0);
2467  if (verbose) {
2468  std::ostringstream os;
2469  os << *prefix << "Input is locally replicated & contiguous; "
2470  "numLocalEntries="
2471  << numLocalEntries << endl;
2472  cerr << os.str();
2473  }
2474  auto retMap =
2475  rcp(new map_type(numGlobalEntries, numLocalEntries,
2476  M->getIndexBase(), M->getComm()));
2477  if (verbose) {
2478  std::ostringstream os;
2479  os << *prefix << "Done" << endl;
2480  cerr << os.str();
2481  }
2482  return retMap;
2483  } else {
2484  if (verbose) {
2485  std::ostringstream os;
2486  os << *prefix << "Input is locally replicated & noncontiguous"
2487  << endl;
2488  cerr << os.str();
2489  }
2490  ArrayView<const GO> myGids =
2491  (myRank == 0) ? M->getLocalElementList() : Teuchos::null;
2492  auto retMap =
2493  rcp(new map_type(GINV, myGids(), M->getIndexBase(),
2494  M->getComm()));
2495  if (verbose) {
2496  std::ostringstream os;
2497  os << *prefix << "Done" << endl;
2498  cerr << os.str();
2499  }
2500  return retMap;
2501  }
2502  } else if (M->isContiguous()) {
2503  if (verbose) {
2504  std::ostringstream os;
2505  os << *prefix << "Input is distributed & contiguous" << endl;
2506  cerr << os.str();
2507  }
2508  // Contiguous, distributed Maps are one-to-one by construction.
2509  // (Locally replicated Maps can be contiguous.)
2510  return M;
2511  } else {
2512  if (verbose) {
2513  std::ostringstream os;
2514  os << *prefix << "Input is distributed & noncontiguous" << endl;
2515  cerr << os.str();
2516  }
2518  const size_t numMyElems = M->getLocalNumElements();
2519  ArrayView<const GO> myElems = M->getLocalElementList();
2520  Array<int> owner_procs_vec(numMyElems);
2521 
2522  if (verbose) {
2523  std::ostringstream os;
2524  os << *prefix << "Call Directory::getDirectoryEntries: ";
2525  verbosePrintArray(os, myElems, "GIDs", maxNumToPrint);
2526  os << endl;
2527  cerr << os.str();
2528  }
2529  directory.getDirectoryEntries(*M, myElems, owner_procs_vec());
2530  if (verbose) {
2531  std::ostringstream os;
2532  os << *prefix << "getDirectoryEntries result: ";
2533  verbosePrintArray(os, owner_procs_vec, "PIDs", maxNumToPrint);
2534  os << endl;
2535  cerr << os.str();
2536  }
2537 
2538  Array<GO> myOwned_vec(numMyElems);
2539  size_t numMyOwnedElems = 0;
2540  for (size_t i = 0; i < numMyElems; ++i) {
2541  const GO GID = myElems[i];
2542  const int owner = owner_procs_vec[i];
2543 
2544  if (myRank == owner) {
2545  myOwned_vec[numMyOwnedElems++] = GID;
2546  }
2547  }
2548  myOwned_vec.resize(numMyOwnedElems);
2549 
2550  if (verbose) {
2551  std::ostringstream os;
2552  os << *prefix << "Create Map: ";
2553  verbosePrintArray(os, myOwned_vec, "GIDs", maxNumToPrint);
2554  os << endl;
2555  cerr << os.str();
2556  }
2557  auto retMap = rcp(new map_type(GINV, myOwned_vec(),
2558  M->getIndexBase(), M->getComm()));
2559  if (verbose) {
2560  std::ostringstream os;
2561  os << *prefix << "Done" << endl;
2562  cerr << os.str();
2563  }
2564  return retMap;
2565  }
2566 }
2567 
2568 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2569 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>
2572  using Details::Behavior;
2574  using std::cerr;
2575  using std::endl;
2576  using Teuchos::Array;
2577  using Teuchos::ArrayView;
2578  using Teuchos::RCP;
2579  using Teuchos::rcp;
2580  using Teuchos::toString;
2581  using LO = LocalOrdinal;
2582  using GO = GlobalOrdinal;
2583  using map_type = Tpetra::Map<LO, GO, Node>;
2584 
2585  const bool verbose = Behavior::verbose("Map");
2586  std::unique_ptr<std::string> prefix;
2587  if (verbose) {
2588  auto comm = M.is_null() ? Teuchos::null : M->getComm();
2589  prefix = Details::createPrefix(
2590  comm.getRawPtr(), "createOneToOne(Map,TieBreak)");
2591  std::ostringstream os;
2592  os << *prefix << "Start" << endl;
2593  cerr << os.str();
2594  }
2595  const size_t maxNumToPrint = verbose ? Behavior::verbosePrintCountThreshold() : size_t(0);
2596 
2597  // FIXME (mfh 20 Feb 2013) We should have a bypass for contiguous
2598  // Maps (which are 1-to-1 by construction).
2599 
2601  if (verbose) {
2602  std::ostringstream os;
2603  os << *prefix << "Initialize Directory" << endl;
2604  cerr << os.str();
2605  }
2606  directory.initialize(*M, tie_break);
2607  if (verbose) {
2608  std::ostringstream os;
2609  os << *prefix << "Done initializing Directory" << endl;
2610  cerr << os.str();
2611  }
2612  size_t numMyElems = M->getLocalNumElements();
2613  ArrayView<const GO> myElems = M->getLocalElementList();
2614  Array<int> owner_procs_vec(numMyElems);
2615  if (verbose) {
2616  std::ostringstream os;
2617  os << *prefix << "Call Directory::getDirectoryEntries: ";
2618  verbosePrintArray(os, myElems, "GIDs", maxNumToPrint);
2619  os << endl;
2620  cerr << os.str();
2621  }
2622  directory.getDirectoryEntries(*M, myElems, owner_procs_vec());
2623  if (verbose) {
2624  std::ostringstream os;
2625  os << *prefix << "getDirectoryEntries result: ";
2626  verbosePrintArray(os, owner_procs_vec, "PIDs", maxNumToPrint);
2627  os << endl;
2628  cerr << os.str();
2629  }
2630 
2631  const int myRank = M->getComm()->getRank();
2632  Array<GO> myOwned_vec(numMyElems);
2633  size_t numMyOwnedElems = 0;
2634  for (size_t i = 0; i < numMyElems; ++i) {
2635  const GO GID = myElems[i];
2636  const int owner = owner_procs_vec[i];
2637  if (myRank == owner) {
2638  myOwned_vec[numMyOwnedElems++] = GID;
2639  }
2640  }
2641  myOwned_vec.resize(numMyOwnedElems);
2642 
2643  // FIXME (mfh 08 May 2014) The above Directory should be perfectly
2644  // valid for the new Map. Why can't we reuse it?
2645  const global_size_t GINV =
2646  Tpetra::Details::OrdinalTraits<global_size_t>::invalid();
2647  if (verbose) {
2648  std::ostringstream os;
2649  os << *prefix << "Create Map: ";
2650  verbosePrintArray(os, myOwned_vec, "GIDs", maxNumToPrint);
2651  os << endl;
2652  cerr << os.str();
2653  }
2654  RCP<const map_type> retMap(new map_type(GINV, myOwned_vec(), M->getIndexBase(),
2655  M->getComm()));
2656  if (verbose) {
2657  std::ostringstream os;
2658  os << *prefix << "Done" << endl;
2659  cerr << os.str();
2660  }
2661  return retMap;
2662 }
2663 
2664 //
2665 // Explicit instantiation macro
2666 //
2667 // Must be expanded from within the Tpetra namespace!
2668 //
2669 
2671 
2672 #define TPETRA_MAP_INSTANT(LO, GO, NODE) \
2673  \
2674  template class Map<LO, GO, NODE>; \
2675  \
2676  template Teuchos::RCP<const Map<LO, GO, NODE>> \
2677  createLocalMapWithNode<LO, GO, NODE>(const size_t numElements, \
2678  const Teuchos::RCP<const Teuchos::Comm<int>>& comm); \
2679  \
2680  template Teuchos::RCP<const Map<LO, GO, NODE>> \
2681  createContigMapWithNode<LO, GO, NODE>(const global_size_t numElements, \
2682  const size_t localNumElements, \
2683  const Teuchos::RCP<const Teuchos::Comm<int>>& comm); \
2684  \
2685  template Teuchos::RCP<const Map<LO, GO, NODE>> \
2686  createNonContigMapWithNode(const Teuchos::ArrayView<const GO>& elementList, \
2687  const Teuchos::RCP<const Teuchos::Comm<int>>& comm); \
2688  \
2689  template Teuchos::RCP<const Map<LO, GO, NODE>> \
2690  createUniformContigMapWithNode<LO, GO, NODE>(const global_size_t numElements, \
2691  const Teuchos::RCP<const Teuchos::Comm<int>>& comm); \
2692  \
2693  template Teuchos::RCP<const Map<LO, GO, NODE>> \
2694  createOneToOne(const Teuchos::RCP<const Map<LO, GO, NODE>>& M); \
2695  \
2696  template Teuchos::RCP<const Map<LO, GO, NODE>> \
2697  createOneToOne(const Teuchos::RCP<const Map<LO, GO, NODE>>& M, \
2698  const Tpetra::Details::TieBreak<LO, GO>& tie_break);
2699 
2701 #define TPETRA_MAP_INSTANT_DEFAULTNODE(LO, GO) \
2702  template Teuchos::RCP<const Map<LO, GO>> \
2703  createLocalMap<LO, GO>(const size_t, const Teuchos::RCP<const Teuchos::Comm<int>>&); \
2704  \
2705  template Teuchos::RCP<const Map<LO, GO>> \
2706  createContigMap<LO, GO>(global_size_t, size_t, \
2707  const Teuchos::RCP<const Teuchos::Comm<int>>&); \
2708  \
2709  template Teuchos::RCP<const Map<LO, GO>> \
2710  createNonContigMap(const Teuchos::ArrayView<const GO>&, \
2711  const Teuchos::RCP<const Teuchos::Comm<int>>&); \
2712  \
2713  template Teuchos::RCP<const Map<LO, GO>> \
2714  createUniformContigMap<LO, GO>(const global_size_t, \
2715  const Teuchos::RCP<const Teuchos::Comm<int>>&);
2716 
2717 #endif // TPETRA_MAP_DEF_HPP
Interface for breaking ties in ownership.
bool congruent(const Teuchos::Comm< int > &comm1, const Teuchos::Comm< int > &comm2)
Whether the two communicators are congruent.
Definition: Tpetra_Util.cpp:33
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createNonContigMap(const Teuchos::ArrayView< const GlobalOrdinal > &elementList, const Teuchos::RCP< const Teuchos::Comm< int >> &comm)
Nonmember constructor for a non-contiguous Map using the default Kokkos::Device type.
global_ordinal_type getMaxGlobalIndex() const
The maximum global index owned by the calling process.
LocalOrdinal local_ordinal_type
The type of local indices.
Declaration of Tpetra::Details::extractMpiCommFromTeuchos.
virtual ~Map()
Destructor (virtual for memory safety of derived classes).
GlobalOrdinal global_ordinal_type
The type of global indices.
bool isSameAs(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is identical to this Map.
Declaration of Tpetra::Details::printOnce.
global_indices_array_type getMyGlobalIndices() const
Return a view of the global indices owned by this process.
bool isContiguous() const
True if this Map is distributed contiguously, else false.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createLocalMap(const size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int >> &comm)
Nonmember constructor for a locally replicated Map with the default Kokkos Node.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
Declaration of a function that prints strings from each process.
size_t getLocalNumElements() const
The number of elements belonging to the calling process.
bool isLocallyFitted(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is locally fitted to this Map.
bool isUniform() const
Whether the range of global indices is uniform.
global_indices_array_device_type getMyGlobalIndicesDevice() const
Return a view of the global indices owned by this process on the Map&#39;s device.
bool isDistributed() const
Whether this Map is globally distributed or locally replicated.
Teuchos::RCP< const Map< local_ordinal_type, global_ordinal_type, Node > > removeEmptyProcesses() const
Advanced methods.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
static bool debug()
Whether Tpetra is in debug mode.
static void reject_unrecognized_env_vars()
Search the environment for TPETRA_ variables and reject unrecognized ones.
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
int local_ordinal_type
Default value of Scalar template parameter.
&quot;Local&quot; part of Map suitable for Kokkos kernels.
Declaration of Tpetra::Details::initializeKokkos.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator, in rank order.
Teuchos::ArrayView< const global_ordinal_type > getLocalElementList() const
Return a NONOWNING view of the global indices owned by this process.
void initializeKokkos()
Initialize Kokkos, using command-line arguments (if any) given to Teuchos::GlobalMPISession.
size_t global_size_t
Global size_t object.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
bool isCompatible(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is compatible with this Map.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createContigMap(const global_size_t numElements, const size_t localNumElements, const Teuchos::RCP< const Teuchos::Comm< int >> &comm)
Non-member constructor for a (potentially) non-uniformly distributed, contiguous Map using the defaul...
Teuchos::RCP< const Map< local_ordinal_type, global_ordinal_type, Node > > replaceCommWithSubset(const Teuchos::RCP< const Teuchos::Comm< int >> &newComm) const
Replace this Map&#39;s communicator with a subset communicator.
global_ordinal_type getIndexBase() const
The index base for this Map.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createLocalMapWithNode(const size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int >> &comm)
Nonmember constructor for a locally replicated Map with a specified Kokkos Node.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
Functions for initializing and finalizing Tpetra.
bool isNodeGlobalElement(global_ordinal_type globalIndex) const
Whether the given global index is owned by this Map on the calling process.
static bool verbose()
Whether Tpetra is in verbose mode.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node >> &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified, possibly nondefault Kokkos Node type.
LookupStatus getDirectoryEntries(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs) const
Given a global ID list, return the list of their owning process IDs.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createContigMapWithNode(const global_size_t numElements, const size_t localNumElements, const Teuchos::RCP< const Teuchos::Comm< int >> &comm)
Nonmember constructor for a (potentially) nonuniformly distributed, contiguous Map for a user-specifi...
local_map_type getLocalMap() const
Get the LocalMap for Kokkos-Kernels.
global_ordinal_type getMinGlobalIndex() const
The minimum global index owned by the calling process.
typename device_type::execution_space execution_space
The Kokkos execution space.
A parallel distribution of indices over processes.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createNonContigMapWithNode(const Teuchos::ArrayView< const GlobalOrdinal > &elementList, const Teuchos::RCP< const Teuchos::Comm< int >> &comm)
Nonmember constructor for a noncontiguous Map with a user-specified, possibly nondefault Kokkos Node ...
global_ordinal_type getMaxAllGlobalIndex() const
The maximum global index over all processes in the communicator.
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
Implement mapping from global ID to process ID and local ID.
Stand-alone utility functions and macros.
bool isOneToOne() const
Whether the Map is one to one.
void initialize(const map_type &map)
Initialize the Directory with its Map.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createUniformContigMap(const global_size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int >> &comm)
Non-member constructor for a uniformly distributed, contiguous Map with the default Kokkos Node...
bool locallySameAs(const Map< local_ordinal_type, global_ordinal_type, node_type > &map) const
Is this Map locally the same as the input Map?
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
typename Node::device_type device_type
This class&#39; Kokkos::Device specialization.
LocalGlobal
Enum for local versus global allocation of Map entries.
LookupStatus getRemoteIndexList(const Teuchos::ArrayView< const global_ordinal_type > &GIDList, const Teuchos::ArrayView< int > &nodeIDList, const Teuchos::ArrayView< local_ordinal_type > &LIDList) const
Return the process ranks and corresponding local indices for the given global indices.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createUniformContigMapWithNode(const global_size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int >> &comm)
Non-member constructor for a uniformly distributed, contiguous Map with a user-specified Kokkos Node...
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Describe this object in a human-readable way to the given output stream.
Node node_type
Legacy typedef that will go away at some point.
global_ordinal_type getMinAllGlobalIndex() const
The minimum global index over all processes in the communicator.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
Definition: Tpetra_Util.cpp:69
global_size_t getGlobalNumElements() const
The number of elements in this Map.
Description of Tpetra&#39;s behavior.
std::string description() const
Implementation of Teuchos::Describable.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.
Map()
Default constructor (that does nothing).