Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Map_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
14 
15 #ifndef TPETRA_MAP_DEF_HPP
16 #define TPETRA_MAP_DEF_HPP
17 
18 #include <memory>
19 #include <sstream>
20 #include <stdexcept>
21 #include <typeinfo>
22 
23 #include "Teuchos_as.hpp"
24 #include "Teuchos_TypeNameTraits.hpp"
25 #include "Teuchos_CommHelpers.hpp"
26 
27 #include "Tpetra_Directory.hpp" // must include for implicit instantiation to work
29 #include "Tpetra_Details_FixedHashTable.hpp"
32 #include "Tpetra_Core.hpp"
33 #include "Tpetra_Util.hpp"
34 #include "Tpetra_Details_mpiIsInitialized.hpp"
35 #include "Tpetra_Details_extractMpiCommFromTeuchos.hpp" // teuchosCommIsAnMpiComm
38 
39 namespace { // (anonymous)
40 
41  void
42  checkMapInputArray (const char ctorName[],
43  const void* indexList,
44  const size_t indexListSize,
45  const Teuchos::Comm<int>* const comm)
46  {
48 
49  const bool debug = Behavior::debug("Map");
50  if (debug) {
51  using Teuchos::outArg;
52  using Teuchos::REDUCE_MIN;
53  using Teuchos::reduceAll;
54  using std::endl;
55 
56  const int myRank = comm == nullptr ? 0 : comm->getRank ();
57  const bool verbose = Behavior::verbose("Map");
58  std::ostringstream lclErrStrm;
59  int lclSuccess = 1;
60 
61  if (indexListSize != 0 && indexList == nullptr) {
62  lclSuccess = 0;
63  if (verbose) {
64  lclErrStrm << "Proc " << myRank << ": indexList is null, "
65  "but indexListSize=" << indexListSize << " != 0." << endl;
66  }
67  }
68  int gblSuccess = 0; // output argument
69  reduceAll (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
70  if (gblSuccess != 1) {
71  std::ostringstream gblErrStrm;
72  gblErrStrm << "Tpetra::Map constructor " << ctorName <<
73  " detected a problem with the input array "
74  "(raw array, Teuchos::ArrayView, or Kokkos::View) "
75  "of global indices." << endl;
76  if (verbose) {
77  using ::Tpetra::Details::gathervPrint;
78  gathervPrint (gblErrStrm, lclErrStrm.str (), *comm);
79  }
80  TEUCHOS_TEST_FOR_EXCEPTION
81  (true, std::invalid_argument, gblErrStrm.str ());
82  }
83  }
84  }
85 
86 
87 
88 
89  template <class LocalOrdinal, class GlobalOrdinal, class ViewType>
90  void computeConstantsOnDevice(const ViewType& entryList, GlobalOrdinal & minMyGID, GlobalOrdinal & maxMyGID, GlobalOrdinal &firstContiguousGID, GlobalOrdinal &lastContiguousGID_val, LocalOrdinal &lastContiguousGID_loc) {
91  using LO = LocalOrdinal;
92  using GO = GlobalOrdinal;
93  using exec_space = typename ViewType::device_type::execution_space;
94  using range_policy = Kokkos::RangePolicy<exec_space, Kokkos::IndexType<LO> >;
95  const LO numLocalElements = entryList.extent(0);
96 
97  // We're going to use the minloc backwards because we need to have it sort on the "location" and have the "value" along for the
98  // ride, rather than the other way around
99  typedef typename Kokkos::MinLoc<LO,GO>::value_type minloc_type;
100  minloc_type myMinLoc;
101 
102  // Find the initial sequence of parallel gids
103  // To find the lastContiguousGID_, we find the first guy where entryList[i] - entryList[0] != i-0. That's the first non-contiguous guy.
104  // We want the one *before* that guy.
105  Kokkos::parallel_reduce(range_policy(0,numLocalElements),KOKKOS_LAMBDA(const LO & i, GO &l_myMin, GO&l_myMax, GO& l_firstCont, minloc_type & l_lastCont){
106  GO entry_0 = entryList[0];
107  GO entry_i = entryList[i];
108 
109  // Easy stuff
110  l_myMin = (l_myMin < entry_i) ? l_myMin : entry_i;
111  l_myMax = (l_myMax > entry_i) ? l_myMax : entry_i;
112  l_firstCont = entry_0;
113 
114  if(entry_i - entry_0 != i && l_lastCont.val >= i) {
115  // We're non-contiguous, so the guy before us could be the last contiguous guy
116  l_lastCont.val = i-1;
117  l_lastCont.loc = entryList[i-1];
118  }
119  else if (i == numLocalElements-1 && i < l_lastCont.val) {
120  // If we're last, we always think we're the last contiguous guy, unless someone non-contiguous is already here
121  l_lastCont.val = i;
122  l_lastCont.loc = entry_i;
123  }
124 
125  },Kokkos::Min<GO>(minMyGID),Kokkos::Max<GO>(maxMyGID),Kokkos::Min<GO>(firstContiguousGID),Kokkos::MinLoc<LO,GO>(myMinLoc));
126 
127  // This switch is intentional, since we're using MinLoc backwards
128  lastContiguousGID_val = myMinLoc.loc;
129  lastContiguousGID_loc = myMinLoc.val;
130  }
131 
132 
133 } // namespace (anonymous)
134 
135 namespace Tpetra {
136 
137  template <class LocalOrdinal, class GlobalOrdinal, class Node>
139  Map () :
140  comm_ (new Teuchos::SerialComm<int> ()),
141  indexBase_ (0),
142  numGlobalElements_ (0),
143  numLocalElements_ (0),
144  minMyGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
145  maxMyGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
146  minAllGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
147  maxAllGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
148  firstContiguousGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
149  lastContiguousGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
150  uniform_ (false), // trivially
151  contiguous_ (false),
152  distributed_ (false), // no communicator yet
153  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
154  {
157  }
158 
159 
160  template <class LocalOrdinal, class GlobalOrdinal, class Node>
162  Map (const global_size_t numGlobalElements,
163  const global_ordinal_type indexBase,
164  const Teuchos::RCP<const Teuchos::Comm<int> > &comm,
165  const LocalGlobal lOrG) :
166  comm_ (comm),
167  uniform_ (true),
168  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
169  {
170  using Teuchos::as;
171  using Teuchos::broadcast;
172  using Teuchos::outArg;
173  using Teuchos::reduceAll;
174  using Teuchos::REDUCE_MIN;
175  using Teuchos::REDUCE_MAX;
176  using Teuchos::typeName;
177  using std::endl;
178  using GO = global_ordinal_type;
179  using GST = global_size_t;
180  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
181  const char funcName[] = "Map(gblNumInds,indexBase,comm,LG)";
182  const char exPfx[] =
183  "Tpetra::Map::Map(gblNumInds,indexBase,comm,LG): ";
184 
185  const bool debug = Details::Behavior::debug("Map");
186  const bool verbose = Details::Behavior::verbose("Map");
187  std::unique_ptr<std::string> prefix;
188  if (verbose) {
189  prefix = Details::createPrefix(
190  comm_.getRawPtr(), "Map", funcName);
191  std::ostringstream os;
192  os << *prefix << "Start" << endl;
193  std::cerr << os.str();
194  }
197 
198  // In debug mode only, check whether numGlobalElements and
199  // indexBase are the same over all processes in the communicator.
200  if (debug) {
201  GST proc0NumGlobalElements = numGlobalElements;
202  broadcast(*comm_, 0, outArg(proc0NumGlobalElements));
203  GST minNumGlobalElements = numGlobalElements;
204  GST maxNumGlobalElements = numGlobalElements;
205  reduceAll(*comm, REDUCE_MIN, numGlobalElements,
206  outArg(minNumGlobalElements));
207  reduceAll(*comm, REDUCE_MAX, numGlobalElements,
208  outArg(maxNumGlobalElements));
209  TEUCHOS_TEST_FOR_EXCEPTION
210  (minNumGlobalElements != maxNumGlobalElements ||
211  numGlobalElements != minNumGlobalElements,
212  std::invalid_argument, exPfx << "All processes must "
213  "provide the same number of global elements. Process 0 set "
214  "numGlobalElements="<< proc0NumGlobalElements << ". The "
215  "calling process " << comm->getRank() << " set "
216  "numGlobalElements=" << numGlobalElements << ". The min "
217  "and max values over all processes are "
218  << minNumGlobalElements << " resp. " << maxNumGlobalElements
219  << ".");
220 
221  GO proc0IndexBase = indexBase;
222  broadcast<int, GO> (*comm_, 0, outArg (proc0IndexBase));
223  GO minIndexBase = indexBase;
224  GO maxIndexBase = indexBase;
225  reduceAll(*comm, REDUCE_MIN, indexBase, outArg(minIndexBase));
226  reduceAll(*comm, REDUCE_MAX, indexBase, outArg(maxIndexBase));
227  TEUCHOS_TEST_FOR_EXCEPTION
228  (minIndexBase != maxIndexBase || indexBase != minIndexBase,
229  std::invalid_argument, exPfx << "All processes must "
230  "provide the same indexBase argument. Process 0 set "
231  "indexBase=" << proc0IndexBase << ". The calling process "
232  << comm->getRank() << " set indexBase=" << indexBase
233  << ". The min and max values over all processes are "
234  << minIndexBase << " resp. " << maxIndexBase << ".");
235  }
236 
237  // Distribute the elements across the processes in the given
238  // communicator so that global IDs (GIDs) are
239  //
240  // - Nonoverlapping (only one process owns each GID)
241  // - Contiguous (the sequence of GIDs is nondecreasing, and no two
242  // adjacent GIDs differ by more than one)
243  // - As evenly distributed as possible (the numbers of GIDs on two
244  // different processes do not differ by more than one)
245 
246  // All processes have the same numGlobalElements, but we still
247  // need to check that it is valid. numGlobalElements must be
248  // positive and not the "invalid" value (GSTI).
249  //
250  // This comparison looks funny, but it avoids compiler warnings
251  // for comparing unsigned integers (numGlobalElements_in is a
252  // GST, which is unsigned) while still working if we
253  // later decide to make GST signed.
254  TEUCHOS_TEST_FOR_EXCEPTION(
255  (numGlobalElements < 1 && numGlobalElements != 0),
256  std::invalid_argument, exPfx << "numGlobalElements (= "
257  << numGlobalElements << ") must be nonnegative.");
258 
259  TEUCHOS_TEST_FOR_EXCEPTION
260  (numGlobalElements == GSTI, std::invalid_argument, exPfx <<
261  "You provided numGlobalElements = Teuchos::OrdinalTraits<"
262  "Tpetra::global_size_t>::invalid(). This version of the "
263  "constructor requires a valid value of numGlobalElements. "
264  "You probably mistook this constructor for the \"contiguous "
265  "nonuniform\" constructor, which can compute the global "
266  "number of elements for you if you set numGlobalElements to "
267  "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid().");
268 
269  size_t numLocalElements = 0; // will set below
270  if (lOrG == GloballyDistributed) {
271  // Compute numLocalElements:
272  //
273  // If numGlobalElements == numProcs * B + remainder,
274  // then Proc r gets B+1 elements if r < remainder,
275  // and B elements if r >= remainder.
276  //
277  // This strategy is valid for any value of numGlobalElements and
278  // numProcs, including the following border cases:
279  // - numProcs == 1
280  // - numLocalElements < numProcs
281  //
282  // In the former case, remainder == 0 && numGlobalElements ==
283  // numLocalElements. In the latter case, remainder ==
284  // numGlobalElements && numLocalElements is either 0 or 1.
285  const GST numProcs = static_cast<GST> (comm_->getSize ());
286  const GST myRank = static_cast<GST> (comm_->getRank ());
287  const GST quotient = numGlobalElements / numProcs;
288  const GST remainder = numGlobalElements - quotient * numProcs;
289 
290  GO startIndex;
291  if (myRank < remainder) {
292  numLocalElements = static_cast<size_t> (1) + static_cast<size_t> (quotient);
293  // myRank was originally an int, so it should never overflow
294  // reasonable GO types.
295  startIndex = as<GO> (myRank) * as<GO> (numLocalElements);
296  } else {
297  numLocalElements = as<size_t> (quotient);
298  startIndex = as<GO> (myRank) * as<GO> (numLocalElements) +
299  as<GO> (remainder);
300  }
301 
302  minMyGID_ = indexBase + startIndex;
303  maxMyGID_ = indexBase + startIndex + numLocalElements - 1;
304  minAllGID_ = indexBase;
305  maxAllGID_ = indexBase + numGlobalElements - 1;
306  distributed_ = (numProcs > 1);
307  }
308  else { // lOrG == LocallyReplicated
309  numLocalElements = as<size_t> (numGlobalElements);
310  minMyGID_ = indexBase;
311  maxMyGID_ = indexBase + numGlobalElements - 1;
312  distributed_ = false;
313  }
314 
315  minAllGID_ = indexBase;
316  maxAllGID_ = indexBase + numGlobalElements - 1;
317  indexBase_ = indexBase;
318  numGlobalElements_ = numGlobalElements;
319  numLocalElements_ = numLocalElements;
320  firstContiguousGID_ = minMyGID_;
321  lastContiguousGID_ = maxMyGID_;
322  contiguous_ = true;
323 
324  // Create the Directory on demand in getRemoteIndexList().
325  //setupDirectory ();
326 
327  if (verbose) {
328  std::ostringstream os;
329  os << *prefix << "Done" << endl;
330  std::cerr << os.str();
331  }
332  }
333 
334 
335  template <class LocalOrdinal, class GlobalOrdinal, class Node>
337  Map (const global_size_t numGlobalElements,
338  const size_t numLocalElements,
339  const global_ordinal_type indexBase,
340  const Teuchos::RCP<const Teuchos::Comm<int> > &comm) :
341  comm_ (comm),
342  uniform_ (false),
343  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
344  {
345  using Teuchos::as;
346  using Teuchos::broadcast;
347  using Teuchos::outArg;
348  using Teuchos::reduceAll;
349  using Teuchos::REDUCE_MIN;
350  using Teuchos::REDUCE_MAX;
351  using Teuchos::REDUCE_SUM;
352  using Teuchos::scan;
353  using std::endl;
354  using GO = global_ordinal_type;
355  using GST = global_size_t;
356  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
357  const char funcName[] =
358  "Map(gblNumInds,lclNumInds,indexBase,comm)";
359  const char exPfx[] =
360  "Tpetra::Map::Map(gblNumInds,lclNumInds,indexBase,comm): ";
361  const char suffix[] =
362  ". Please report this bug to the Tpetra developers.";
363 
364  const bool debug = Details::Behavior::debug("Map");
365  const bool verbose = Details::Behavior::verbose("Map");
366  std::unique_ptr<std::string> prefix;
367  if (verbose) {
368  prefix = Details::createPrefix(
369  comm_.getRawPtr(), "Map", funcName);
370  std::ostringstream os;
371  os << *prefix << "Start" << endl;
372  std::cerr << os.str();
373  }
376 
377  // Global sum of numLocalElements over all processes.
378  // Keep this for later debug checks.
379  GST debugGlobalSum {};
380  if (debug) {
381  debugGlobalSum = initialNonuniformDebugCheck(exPfx,
382  numGlobalElements, numLocalElements, indexBase, comm);
383  }
384 
385  // Distribute the elements across the nodes so that they are
386  // - non-overlapping
387  // - contiguous
388 
389  // This differs from the first Map constructor (that only takes a
390  // global number of elements) in that the user has specified the
391  // number of local elements, so that the elements are not
392  // (necessarily) evenly distributed over the processes.
393 
394  // Compute my local offset. This is an inclusive scan, so to get
395  // the final offset, we subtract off the input.
396  GO scanResult = 0;
397  scan<int, GO> (*comm, REDUCE_SUM, numLocalElements, outArg (scanResult));
398  const GO myOffset = scanResult - numLocalElements;
399 
400  if (numGlobalElements != GSTI) {
401  numGlobalElements_ = numGlobalElements; // Use the user's value.
402  }
403  else {
404  // Inclusive scan means that the last process has the final sum.
405  // Rather than doing a reduceAll to get the sum of
406  // numLocalElements, we can just have the last process broadcast
407  // its result. That saves us a round of log(numProcs) messages.
408  const int numProcs = comm->getSize ();
409  GST globalSum = scanResult;
410  if (numProcs > 1) {
411  broadcast (*comm, numProcs - 1, outArg (globalSum));
412  }
413  numGlobalElements_ = globalSum;
414 
415  if (debug) {
416  // No need for an all-reduce here; both come from collectives.
417  TEUCHOS_TEST_FOR_EXCEPTION
418  (globalSum != debugGlobalSum, std::logic_error, exPfx <<
419  "globalSum = " << globalSum << " != debugGlobalSum = " <<
420  debugGlobalSum << suffix);
421  }
422  }
423  numLocalElements_ = numLocalElements;
424  indexBase_ = indexBase;
425  minAllGID_ = (numGlobalElements_ == 0) ?
426  std::numeric_limits<GO>::max () :
427  indexBase;
428  maxAllGID_ = (numGlobalElements_ == 0) ?
429  std::numeric_limits<GO>::lowest () :
430  indexBase + GO(numGlobalElements_) - GO(1);
431  minMyGID_ = (numLocalElements_ == 0) ?
432  std::numeric_limits<GO>::max () :
433  indexBase + GO(myOffset);
434  maxMyGID_ = (numLocalElements_ == 0) ?
435  std::numeric_limits<GO>::lowest () :
436  indexBase + myOffset + GO(numLocalElements) - GO(1);
437  firstContiguousGID_ = minMyGID_;
438  lastContiguousGID_ = maxMyGID_;
439  contiguous_ = true;
440  distributed_ = checkIsDist ();
441 
442  // Create the Directory on demand in getRemoteIndexList().
443  //setupDirectory ();
444 
445  if (verbose) {
446  std::ostringstream os;
447  os << *prefix << "Done" << endl;
448  std::cerr << os.str();
449  }
450  }
451 
452  template <class LocalOrdinal, class GlobalOrdinal, class Node>
456  const char errorMessagePrefix[],
457  const global_size_t numGlobalElements,
458  const size_t numLocalElements,
459  const global_ordinal_type indexBase,
460  const Teuchos::RCP<const Teuchos::Comm<int>>& comm) const
461  {
462  const bool debug = Details::Behavior::debug("Map");
463  if (! debug) {
464  return global_size_t(0);
465  }
466 
467  using Teuchos::broadcast;
468  using Teuchos::outArg;
469  using Teuchos::ptr;
470  using Teuchos::REDUCE_MAX;
471  using Teuchos::REDUCE_MIN;
472  using Teuchos::REDUCE_SUM;
473  using Teuchos::reduceAll;
474  using GO = global_ordinal_type;
475  using GST = global_size_t;
476  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
477 
478  // The user has specified the distribution of indices over the
479  // processes. The distribution is not necessarily contiguous or
480  // equally shared over the processes.
481  //
482  // We assume that the number of local elements can be stored in a
483  // size_t. The instance member numLocalElements_ is a size_t, so
484  // this variable and that should have the same type.
485 
486  GST debugGlobalSum = 0; // Will be global sum of numLocalElements
487  reduceAll<int, GST> (*comm, REDUCE_SUM, static_cast<GST> (numLocalElements),
488  outArg (debugGlobalSum));
489  // In debug mode only, check whether numGlobalElements and
490  // indexBase are the same over all processes in the communicator.
491  {
492  GST proc0NumGlobalElements = numGlobalElements;
493  broadcast<int, GST> (*comm_, 0, outArg (proc0NumGlobalElements));
494  GST minNumGlobalElements = numGlobalElements;
495  GST maxNumGlobalElements = numGlobalElements;
496  reduceAll<int, GST> (*comm, REDUCE_MIN, numGlobalElements,
497  outArg (minNumGlobalElements));
498  reduceAll<int, GST> (*comm, REDUCE_MAX, numGlobalElements,
499  outArg (maxNumGlobalElements));
500  TEUCHOS_TEST_FOR_EXCEPTION
501  (minNumGlobalElements != maxNumGlobalElements ||
502  numGlobalElements != minNumGlobalElements,
503  std::invalid_argument, errorMessagePrefix << "All processes "
504  "must provide the same number of global elements, even if "
505  "that argument is "
506  "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid() "
507  "(which signals that the Map should compute the global "
508  "number of elements). Process 0 set numGlobalElements"
509  "=" << proc0NumGlobalElements << ". The calling process "
510  << comm->getRank() << " set numGlobalElements=" <<
511  numGlobalElements << ". The min and max values over all "
512  "processes are " << minNumGlobalElements << " resp. " <<
513  maxNumGlobalElements << ".");
514 
515  GO proc0IndexBase = indexBase;
516  broadcast<int, GO> (*comm_, 0, outArg (proc0IndexBase));
517  GO minIndexBase = indexBase;
518  GO maxIndexBase = indexBase;
519  reduceAll<int, GO> (*comm, REDUCE_MIN, indexBase, outArg (minIndexBase));
520  reduceAll<int, GO> (*comm, REDUCE_MAX, indexBase, outArg (maxIndexBase));
521  TEUCHOS_TEST_FOR_EXCEPTION
522  (minIndexBase != maxIndexBase || indexBase != minIndexBase,
523  std::invalid_argument, errorMessagePrefix <<
524  "All processes must provide the same indexBase argument. "
525  "Process 0 set indexBase = " << proc0IndexBase << ". The "
526  "calling process " << comm->getRank() << " set indexBase="
527  << indexBase << ". The min and max values over all "
528  "processes are " << minIndexBase << " resp. " << maxIndexBase
529  << ".");
530 
531  // Make sure that the sum of numLocalElements over all processes
532  // equals numGlobalElements.
533  TEUCHOS_TEST_FOR_EXCEPTION
534  (numGlobalElements != GSTI &&
535  debugGlobalSum != numGlobalElements, std::invalid_argument,
536  errorMessagePrefix << "The sum of each process' number of "
537  "indices over all processes, " << debugGlobalSum << ", != "
538  << "numGlobalElements=" << numGlobalElements << ". If you "
539  "would like this constructor to compute numGlobalElements "
540  "for you, you may set numGlobalElements="
541  "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid() "
542  "on input. Please note that this is NOT necessarily -1.");
543  }
544  return debugGlobalSum;
545  }
546 
547  template <class LocalOrdinal, class GlobalOrdinal, class Node>
548  void
549  Map<LocalOrdinal,GlobalOrdinal,Node>::
550  initWithNonownedHostIndexList(
551  const char errorMessagePrefix[],
552  const global_size_t numGlobalElements,
553  const Kokkos::View<const global_ordinal_type*,
554  Kokkos::LayoutLeft,
555  Kokkos::HostSpace,
556  Kokkos::MemoryUnmanaged>& entryList_host,
557  const global_ordinal_type indexBase,
558  const Teuchos::RCP<const Teuchos::Comm<int>>& comm)
559  {
560  Tpetra::Details::ProfilingRegion pr("Map::initWithNonownedHostIndexList()");
561 
562  using Kokkos::LayoutLeft;
563  using Kokkos::subview;
564  using Kokkos::View;
565  using Kokkos::view_alloc;
566  using Kokkos::WithoutInitializing;
567  using Teuchos::as;
568  using Teuchos::broadcast;
569  using Teuchos::outArg;
570  using Teuchos::ptr;
571  using Teuchos::REDUCE_MAX;
572  using Teuchos::REDUCE_MIN;
573  using Teuchos::REDUCE_SUM;
574  using Teuchos::reduceAll;
575  using LO = local_ordinal_type;
576  using GO = global_ordinal_type;
577  using GST = global_size_t;
578  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
579  // Make sure that Kokkos has been initialized (Github Issue #513).
580  TEUCHOS_TEST_FOR_EXCEPTION
581  (! Kokkos::is_initialized (), std::runtime_error,
582  errorMessagePrefix << "The Kokkos execution space "
583  << Teuchos::TypeNameTraits<execution_space>::name()
584  << " has not been initialized. "
585  "Please initialize it before creating a Map.")
586 
587  // The user has specified the distribution of indices over the
588  // processes, via the input array of global indices on each
589  // process. The distribution is not necessarily contiguous or
590  // equally shared over the processes.
591 
592  // The length of the input array on this process is the number of
593  // local indices to associate with this process, even though the
594  // input array contains global indices. We assume that the number
595  // of local indices on a process can be stored in a size_t;
596  // numLocalElements_ is a size_t, so this variable and that should
597  // have the same type.
598  const size_t numLocalElements(entryList_host.size());
599 
600  initialNonuniformDebugCheck(errorMessagePrefix, numGlobalElements,
601  numLocalElements, indexBase, comm);
602 
603  // NOTE (mfh 20 Feb 2013, 10 Oct 2016) In some sense, this global
604  // reduction is redundant, since the directory Map will have to do
605  // the same thing. Thus, we could do the scan and broadcast for
606  // the directory Map here, and give the computed offsets to the
607  // directory Map's constructor. However, a reduction costs less
608  // than a scan and broadcast, so this still saves time if users of
609  // this Map don't ever need the Directory (i.e., if they never
610  // call getRemoteIndexList on this Map).
611  if (numGlobalElements != GSTI) {
612  numGlobalElements_ = numGlobalElements; // Use the user's value.
613  }
614  else { // The user wants us to compute the sum.
615  reduceAll(*comm, REDUCE_SUM,
616  static_cast<GST>(numLocalElements),
617  outArg(numGlobalElements_));
618  }
619 
620  // mfh 20 Feb 2013: We've never quite done the right thing for
621  // duplicate GIDs here. Duplicate GIDs have always been counted
622  // distinctly in numLocalElements_, and thus should get a
623  // different LID. However, we've always used std::map or a hash
624  // table for the GID -> LID lookup table, so distinct GIDs always
625  // map to the same LID. Furthermore, the order of the input GID
626  // list matters, so it's not desirable to sort for determining
627  // uniqueness.
628  //
629  // I've chosen for now to write this code as if the input GID list
630  // contains no duplicates. If this is not desired, we could use
631  // the lookup table itself to determine uniqueness: If we haven't
632  // seen the GID before, it gets a new LID and it's added to the
633  // LID -> GID and GID -> LID tables. If we have seen the GID
634  // before, it doesn't get added to either table. I would
635  // implement this, but it would cost more to do the double lookups
636  // in the table (one to check, and one to insert).
637  //
638  // More importantly, since we build the GID -> LID table in (a
639  // thread-) parallel (way), the order in which duplicate GIDs may
640  // get inserted is not defined. This would make the assignment of
641  // LID to GID nondeterministic.
642 
643  numLocalElements_ = numLocalElements;
644  indexBase_ = indexBase;
645 
646  minMyGID_ = indexBase_;
647  maxMyGID_ = indexBase_;
648 
649  // NOTE (mfh 27 May 2015): While finding the initial contiguous
650  // GID range requires looking at all the GIDs in the range,
651  // dismissing an interval of GIDs only requires looking at the
652  // first and last GIDs. Thus, we could do binary search backwards
653  // from the end in order to catch the common case of a contiguous
654  // interval followed by noncontiguous entries. On the other hand,
655  // we could just expose this case explicitly as yet another Map
656  // constructor, and avoid the trouble of detecting it.
657  if (numLocalElements_ > 0) {
658  // Find contiguous GID range, with the restriction that the
659  // beginning of the range starts with the first entry. While
660  // doing so, fill in the LID -> GID table.
661  typename decltype (lgMap_)::non_const_type lgMap
662  (view_alloc ("lgMap", WithoutInitializing), numLocalElements_);
663  auto lgMap_host =
664  Kokkos::create_mirror_view (Kokkos::HostSpace (), lgMap);
665 
666  // The input array entryList_host is already on host, so we
667  // don't need to take a host view of it.
668  // auto entryList_host =
669  // Kokkos::create_mirror_view (Kokkos::HostSpace (), entryList);
670  // Kokkos::deep_copy (entryList_host, entryList);
671 
672  firstContiguousGID_ = entryList_host[0];
673  lastContiguousGID_ = firstContiguousGID_+1;
674 
675  // FIXME (mfh 23 Sep 2015) We need to copy the input GIDs
676  // anyway, so we have to look at them all. The logical way to
677  // find the first noncontiguous entry would thus be to "reduce,"
678  // where the local reduction result is whether entryList[i] + 1
679  // == entryList[i+1].
680 
681  lgMap_host[0] = firstContiguousGID_;
682  size_t i = 1;
683  for ( ; i < numLocalElements_; ++i) {
684  const GO curGid = entryList_host[i];
685  const LO curLid = as<LO> (i);
686 
687  if (lastContiguousGID_ != curGid) break;
688 
689  // Add the entry to the LID->GID table only after we know that
690  // the current GID is in the initial contiguous sequence, so
691  // that we don't repeat adding it in the first iteration of
692  // the loop below over the remaining noncontiguous GIDs.
693  lgMap_host[curLid] = curGid;
694  ++lastContiguousGID_;
695  }
696  --lastContiguousGID_;
697  // NOTE: i is the first non-contiguous index.
698 
699  // [firstContiguousGID_, lastContigousGID_] is the initial
700  // sequence of contiguous GIDs. We can start the min and max
701  // GID using this range.
702  minMyGID_ = firstContiguousGID_;
703  maxMyGID_ = lastContiguousGID_;
704 
705  // Compute the GID -> LID lookup table, _not_ including the
706  // initial sequence of contiguous GIDs.
707  LO firstNonContiguous_loc=i;
708  {
709 
710  const std::pair<size_t, size_t> ncRange (i, entryList_host.extent (0));
711  auto nonContigGids_host = subview (entryList_host, ncRange);
712  TEUCHOS_TEST_FOR_EXCEPTION
713  (static_cast<size_t> (nonContigGids_host.extent (0)) !=
714  static_cast<size_t> (entryList_host.extent (0) - i),
715  std::logic_error, "Tpetra::Map noncontiguous constructor: "
716  "nonContigGids_host.extent(0) = "
717  << nonContigGids_host.extent (0)
718  << " != entryList_host.extent(0) - i = "
719  << (entryList_host.extent (0) - i) << " = "
720  << entryList_host.extent (0) << " - " << i
721  << ". Please report this bug to the Tpetra developers.");
722 
723  // FixedHashTable's constructor expects an owned device View,
724  // so we must deep-copy the subview of the input indices.
725  View<GO*, LayoutLeft, device_type>
726  nonContigGids (view_alloc ("nonContigGids", WithoutInitializing),
727  nonContigGids_host.size ());
728 
729 
730  // DEEP_COPY REVIEW - HOST-TO-DEVICE
731  Kokkos::deep_copy (execution_space(), nonContigGids, nonContigGids_host);
732  Kokkos::fence("Map::initWithNonownedHostIndexList"); // for UVM issues below - which will be refatored soon so FixedHashTable can build as pure CudaSpace - then I think remove this fence
733 
734  glMap_ = global_to_local_table_type(nonContigGids,
735  firstNonContiguous_loc);
736  // Make host version - when memory spaces match these just do trivial assignment
737  glMapHost_ = global_to_local_table_host_type(glMap_);
738  }
739 
740  // FIXME (mfh 10 Oct 2016) When we construct the global-to-local
741  // table above, we have to look at all the (noncontiguous) input
742  // indices anyway. Thus, why not have the constructor compute
743  // and return the min and max?
744 
745  for ( ; i < numLocalElements_; ++i) {
746  const GO curGid = entryList_host[i];
747  const LO curLid = static_cast<LO> (i);
748  lgMap_host[curLid] = curGid; // LID -> GID table
749 
750  // While iterating through entryList, we compute its
751  // (process-local) min and max elements.
752  if (curGid < minMyGID_) {
753  minMyGID_ = curGid;
754  }
755  if (curGid > maxMyGID_) {
756  maxMyGID_ = curGid;
757  }
758  }
759 
760  // We filled lgMap on host above; now sync back to device.
761  // DEEP_COPY REVIEW - HOST-TO-DEVICE
762  Kokkos::deep_copy (execution_space(), lgMap, lgMap_host);
763 
764  // "Commit" the local-to-global lookup table we filled in above.
765  lgMap_ = lgMap;
766  // We've already created this, so use it.
767  lgMapHost_ = lgMap_host;
768 
769 
770  }
771  else {
772  minMyGID_ = std::numeric_limits<GlobalOrdinal>::max();
773  maxMyGID_ = std::numeric_limits<GlobalOrdinal>::lowest();
774  // This insures tests for GIDs in the range
775  // [firstContiguousGID_, lastContiguousGID_] fail for processes
776  // with no local elements.
777  firstContiguousGID_ = indexBase_+1;
778  lastContiguousGID_ = indexBase_;
779  // glMap_ was default constructed, so it's already empty.
780 
781  }
782 
783 
784 
785 
786  // Compute the min and max of all processes' GIDs. If
787  // numLocalElements_ == 0 on this process, minMyGID_ and maxMyGID_
788  // are both indexBase_. This is wrong, but fixing it would
789  // require either a fancy sparse all-reduce, or a custom reduction
790  // operator that ignores invalid values ("invalid" means
791  // Tpetra::Details::OrdinalTraits<GO>::invalid()).
792  //
793  // Also, while we're at it, use the same all-reduce to figure out
794  // if the Map is distributed. "Distributed" means that there is
795  // at least one process with a number of local elements less than
796  // the number of global elements.
797  //
798  // We're computing the min and max of all processes' GIDs using a
799  // single MAX all-reduce, because min(x,y) = -max(-x,-y) (when x
800  // and y are signed). (This lets us combine the min and max into
801  // a single all-reduce.) If each process sets localDist=1 if its
802  // number of local elements is strictly less than the number of
803  // global elements, and localDist=0 otherwise, then a MAX
804  // all-reduce on localDist tells us if the Map is distributed (1
805  // if yes, 0 if no). Thus, we can append localDist onto the end
806  // of the data and get the global result from the all-reduce.
807  if (std::numeric_limits<GO>::is_signed) {
808  // Does my process NOT own all the elements?
809  const GO localDist =
810  (as<GST> (numLocalElements_) < numGlobalElements_) ? 1 : 0;
811 
812  GO minMaxInput[3];
813  minMaxInput[0] = -minMyGID_;
814  minMaxInput[1] = maxMyGID_;
815  minMaxInput[2] = localDist;
816 
817  GO minMaxOutput[3];
818  minMaxOutput[0] = 0;
819  minMaxOutput[1] = 0;
820  minMaxOutput[2] = 0;
821  reduceAll<int, GO> (*comm, REDUCE_MAX, 3, minMaxInput, minMaxOutput);
822  minAllGID_ = -minMaxOutput[0];
823  maxAllGID_ = minMaxOutput[1];
824  const GO globalDist = minMaxOutput[2];
825  distributed_ = (comm_->getSize () > 1 && globalDist == 1);
826  }
827  else { // unsigned; use two reductions
828  // This is always correct, no matter the signedness of GO.
829  reduceAll<int, GO> (*comm_, REDUCE_MIN, minMyGID_, outArg (minAllGID_));
830  reduceAll<int, GO> (*comm_, REDUCE_MAX, maxMyGID_, outArg (maxAllGID_));
831  distributed_ = checkIsDist ();
832  }
833 
834  contiguous_ = false; // "Contiguous" is conservative.
835 
836  TEUCHOS_TEST_FOR_EXCEPTION(
837  minAllGID_ < indexBase_,
838  std::invalid_argument,
839  "Tpetra::Map constructor (noncontiguous): "
840  "Minimum global ID = " << minAllGID_ << " over all process(es) is "
841  "less than the given indexBase = " << indexBase_ << ".");
842 
843  // Create the Directory on demand in getRemoteIndexList().
844  //setupDirectory ();
845  }
846 
847  template <class LocalOrdinal, class GlobalOrdinal, class Node>
849  Map (const global_size_t numGlobalElements,
850  const GlobalOrdinal indexList[],
851  const LocalOrdinal indexListSize,
852  const GlobalOrdinal indexBase,
853  const Teuchos::RCP<const Teuchos::Comm<int> >& comm) :
854  comm_ (comm),
855  uniform_ (false),
856  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
857  {
858  using std::endl;
859  const char funcName[] =
860  "Map(gblNumInds,indexList,indexListSize,indexBase,comm)";
861 
862  const bool verbose = Details::Behavior::verbose("Map");
863  std::unique_ptr<std::string> prefix;
864  if (verbose) {
865  prefix = Details::createPrefix(
866  comm_.getRawPtr(), "Map", funcName);
867  std::ostringstream os;
868  os << *prefix << "Start" << endl;
869  std::cerr << os.str();
870  }
874  checkMapInputArray ("(GST, const GO[], LO, GO, comm)",
875  indexList, static_cast<size_t> (indexListSize),
876  comm.getRawPtr ());
877  // Not quite sure if I trust all code to behave correctly if the
878  // pointer is nonnull but the array length is nonzero, so I'll
879  // make sure the raw pointer is null if the length is zero.
880  const GlobalOrdinal* const indsRaw = indexListSize == 0 ? NULL : indexList;
881  Kokkos::View<const GlobalOrdinal*,
882  Kokkos::LayoutLeft,
883  Kokkos::HostSpace,
884  Kokkos::MemoryUnmanaged> inds (indsRaw, indexListSize);
885  initWithNonownedHostIndexList(funcName, numGlobalElements, inds,
886  indexBase, comm);
887  if (verbose) {
888  std::ostringstream os;
889  os << *prefix << "Done" << endl;
890  std::cerr << os.str();
891  }
892  }
893 
894  template <class LocalOrdinal, class GlobalOrdinal, class Node>
896  Map (const global_size_t numGlobalElements,
897  const Teuchos::ArrayView<const GlobalOrdinal>& entryList,
898  const GlobalOrdinal indexBase,
899  const Teuchos::RCP<const Teuchos::Comm<int> >& comm) :
900  comm_ (comm),
901  uniform_ (false),
902  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
903  {
904  using std::endl;
905  const char* funcName = "Map(gblNumInds,entryList(Teuchos::ArrayView),indexBase,comm)";
906 
907  const bool verbose = Details::Behavior::verbose("Map");
908  std::unique_ptr<std::string> prefix;
909  if (verbose) {
910  prefix = Details::createPrefix(
911  comm_.getRawPtr(), "Map", funcName);
912  std::ostringstream os;
913  os << *prefix << "Start" << endl;
914  std::cerr << os.str();
915  }
919  const size_t numLclInds = static_cast<size_t> (entryList.size ());
920  checkMapInputArray ("(GST, ArrayView, GO, comm)",
921  entryList.getRawPtr (), numLclInds,
922  comm.getRawPtr ());
923  // Not quite sure if I trust both ArrayView and View to behave
924  // correctly if the pointer is nonnull but the array length is
925  // nonzero, so I'll make sure it's null if the length is zero.
926  const GlobalOrdinal* const indsRaw =
927  numLclInds == 0 ? NULL : entryList.getRawPtr ();
928  Kokkos::View<const GlobalOrdinal*,
929  Kokkos::LayoutLeft,
930  Kokkos::HostSpace,
931  Kokkos::MemoryUnmanaged> inds (indsRaw, numLclInds);
932  initWithNonownedHostIndexList(funcName, numGlobalElements, inds,
933  indexBase, comm);
934  if (verbose) {
935  std::ostringstream os;
936  os << *prefix << "Done" << endl;
937  std::cerr << os.str();
938  }
939  }
940 
941 
942  template <class LocalOrdinal, class GlobalOrdinal, class Node>
944  Map (const global_size_t numGlobalElements,
945  const Kokkos::View<const GlobalOrdinal*, device_type>& entryList,
946  const GlobalOrdinal indexBase,
947  const Teuchos::RCP<const Teuchos::Comm<int> >& comm) :
948  comm_ (comm),
949  uniform_ (false),
950  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
951  {
952  using Kokkos::LayoutLeft;
953  using Kokkos::subview;
954  using Kokkos::View;
955  using Kokkos::view_alloc;
956  using Kokkos::WithoutInitializing;
957  using Teuchos::arcp;
958  using Teuchos::ArrayView;
959  using Teuchos::as;
960  using Teuchos::broadcast;
961  using Teuchos::outArg;
962  using Teuchos::ptr;
963  using Teuchos::REDUCE_MAX;
964  using Teuchos::REDUCE_MIN;
965  using Teuchos::REDUCE_SUM;
966  using Teuchos::reduceAll;
967  using Teuchos::typeName;
968  using std::endl;
969  using LO = local_ordinal_type;
970  using GO = global_ordinal_type;
971  using GST = global_size_t;
972  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
973  const char funcName[] =
974  "Map(gblNumInds,entryList(Kokkos::View),indexBase,comm)";
975 
976  const bool verbose = Details::Behavior::verbose("Map");
977  std::unique_ptr<std::string> prefix;
978  if (verbose) {
979  prefix = Details::createPrefix(
980  comm_.getRawPtr(), "Map", funcName);
981  std::ostringstream os;
982  os << *prefix << "Start" << endl;
983  std::cerr << os.str();
984  }
988  checkMapInputArray ("(GST, Kokkos::View, GO, comm)",
989  entryList.data (),
990  static_cast<size_t> (entryList.extent (0)),
991  comm.getRawPtr ());
992 
993  // The user has specified the distribution of indices over the
994  // processes, via the input array of global indices on each
995  // process. The distribution is not necessarily contiguous or
996  // equally shared over the processes.
997 
998  // The length of the input array on this process is the number of
999  // local indices to associate with this process, even though the
1000  // input array contains global indices. We assume that the number
1001  // of local indices on a process can be stored in a size_t;
1002  // numLocalElements_ is a size_t, so this variable and that should
1003  // have the same type.
1004  const size_t numLocalElements(entryList.size());
1005 
1006  initialNonuniformDebugCheck(funcName, numGlobalElements,
1007  numLocalElements, indexBase, comm);
1008 
1009  // NOTE (mfh 20 Feb 2013, 10 Oct 2016) In some sense, this global
1010  // reduction is redundant, since the directory Map will have to do
1011  // the same thing. Thus, we could do the scan and broadcast for
1012  // the directory Map here, and give the computed offsets to the
1013  // directory Map's constructor. However, a reduction costs less
1014  // than a scan and broadcast, so this still saves time if users of
1015  // this Map don't ever need the Directory (i.e., if they never
1016  // call getRemoteIndexList on this Map).
1017  if (numGlobalElements != GSTI) {
1018  numGlobalElements_ = numGlobalElements; // Use the user's value.
1019  }
1020  else { // The user wants us to compute the sum.
1021  reduceAll(*comm, REDUCE_SUM,
1022  static_cast<GST>(numLocalElements),
1023  outArg(numGlobalElements_));
1024  }
1025 
1026 
1027  // mfh 20 Feb 2013: We've never quite done the right thing for
1028  // duplicate GIDs here. Duplicate GIDs have always been counted
1029  // distinctly in numLocalElements_, and thus should get a
1030  // different LID. However, we've always used std::map or a hash
1031  // table for the GID -> LID lookup table, so distinct GIDs always
1032  // map to the same LID. Furthermore, the order of the input GID
1033  // list matters, so it's not desirable to sort for determining
1034  // uniqueness.
1035  //
1036  // I've chosen for now to write this code as if the input GID list
1037  // contains no duplicates. If this is not desired, we could use
1038  // the lookup table itself to determine uniqueness: If we haven't
1039  // seen the GID before, it gets a new LID and it's added to the
1040  // LID -> GID and GID -> LID tables. If we have seen the GID
1041  // before, it doesn't get added to either table. I would
1042  // implement this, but it would cost more to do the double lookups
1043  // in the table (one to check, and one to insert).
1044  //
1045  // More importantly, since we build the GID -> LID table in (a
1046  // thread-) parallel (way), the order in which duplicate GIDs may
1047  // get inserted is not defined. This would make the assignment of
1048  // LID to GID nondeterministic.
1049 
1050  numLocalElements_ = numLocalElements;
1051  indexBase_ = indexBase;
1052 
1053  minMyGID_ = indexBase_;
1054  maxMyGID_ = indexBase_;
1055 
1056  // NOTE (mfh 27 May 2015): While finding the initial contiguous
1057  // GID range requires looking at all the GIDs in the range,
1058  // dismissing an interval of GIDs only requires looking at the
1059  // first and last GIDs. Thus, we could do binary search backwards
1060  // from the end in order to catch the common case of a contiguous
1061  // interval followed by noncontiguous entries. On the other hand,
1062  // we could just expose this case explicitly as yet another Map
1063  // constructor, and avoid the trouble of detecting it.
1064  if (numLocalElements_ > 0) {
1065  // Find contiguous GID range, with the restriction that the
1066  // beginning of the range starts with the first entry. While
1067  // doing so, fill in the LID -> GID table.
1068  typename decltype (lgMap_)::non_const_type lgMap
1069  (view_alloc ("lgMap", WithoutInitializing), numLocalElements_);
1070 
1071  // Because you can't use lambdas in constructors on CUDA. Or using private/protected data.
1072  // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
1073  Kokkos::deep_copy(typename device_type::execution_space(),lgMap,entryList);
1074  LO lastContiguousGID_loc;
1075  computeConstantsOnDevice(entryList,minMyGID_,maxMyGID_,firstContiguousGID_,lastContiguousGID_,lastContiguousGID_loc);
1076  LO firstNonContiguous_loc = lastContiguousGID_loc+1;
1077  auto nonContigGids = Kokkos::subview(entryList,std::pair<size_t,size_t>(firstNonContiguous_loc,entryList.extent(0)));
1078 
1079  // NOTE: We do not fill the glMapHost_ and lgMapHost_ views here. They will be filled lazily later
1080  glMap_ = global_to_local_table_type(nonContigGids,
1081  firstNonContiguous_loc);
1082 
1083  // "Commit" the local-to-global lookup table we filled in above.
1084  lgMap_ = lgMap;
1085 
1086  }
1087  else {
1088  minMyGID_ = std::numeric_limits<GlobalOrdinal>::max();
1089  maxMyGID_ = std::numeric_limits<GlobalOrdinal>::lowest();
1090  // This insures tests for GIDs in the range
1091  // [firstContiguousGID_, lastContiguousGID_] fail for processes
1092  // with no local elements.
1093  firstContiguousGID_ = indexBase_+1;
1094  lastContiguousGID_ = indexBase_;
1095  // glMap_ was default constructed, so it's already empty.
1096  }
1097 
1098 
1099  // Compute the min and max of all processes' GIDs. If
1100  // numLocalElements_ == 0 on this process, minMyGID_ and maxMyGID_
1101  // are both indexBase_. This is wrong, but fixing it would
1102  // require either a fancy sparse all-reduce, or a custom reduction
1103  // operator that ignores invalid values ("invalid" means
1104  // Tpetra::Details::OrdinalTraits<GO>::invalid()).
1105  //
1106  // Also, while we're at it, use the same all-reduce to figure out
1107  // if the Map is distributed. "Distributed" means that there is
1108  // at least one process with a number of local elements less than
1109  // the number of global elements.
1110  //
1111  // We're computing the min and max of all processes' GIDs using a
1112  // single MAX all-reduce, because min(x,y) = -max(-x,-y) (when x
1113  // and y are signed). (This lets us combine the min and max into
1114  // a single all-reduce.) If each process sets localDist=1 if its
1115  // number of local elements is strictly less than the number of
1116  // global elements, and localDist=0 otherwise, then a MAX
1117  // all-reduce on localDist tells us if the Map is distributed (1
1118  // if yes, 0 if no). Thus, we can append localDist onto the end
1119  // of the data and get the global result from the all-reduce.
1120  if (std::numeric_limits<GO>::is_signed) {
1121  // Does my process NOT own all the elements?
1122  const GO localDist =
1123  (as<GST> (numLocalElements_) < numGlobalElements_) ? 1 : 0;
1124 
1125  GO minMaxInput[3];
1126  minMaxInput[0] = -minMyGID_;
1127  minMaxInput[1] = maxMyGID_;
1128  minMaxInput[2] = localDist;
1129 
1130  GO minMaxOutput[3];
1131  minMaxOutput[0] = 0;
1132  minMaxOutput[1] = 0;
1133  minMaxOutput[2] = 0;
1134  reduceAll<int, GO> (*comm, REDUCE_MAX, 3, minMaxInput, minMaxOutput);
1135  minAllGID_ = -minMaxOutput[0];
1136  maxAllGID_ = minMaxOutput[1];
1137  const GO globalDist = minMaxOutput[2];
1138  distributed_ = (comm_->getSize () > 1 && globalDist == 1);
1139  }
1140  else { // unsigned; use two reductions
1141  // This is always correct, no matter the signedness of GO.
1142  reduceAll<int, GO> (*comm_, REDUCE_MIN, minMyGID_, outArg (minAllGID_));
1143  reduceAll<int, GO> (*comm_, REDUCE_MAX, maxMyGID_, outArg (maxAllGID_));
1144  distributed_ = checkIsDist ();
1145  }
1146 
1147  contiguous_ = false; // "Contiguous" is conservative.
1148 
1149  TEUCHOS_TEST_FOR_EXCEPTION(
1150  minAllGID_ < indexBase_,
1151  std::invalid_argument,
1152  "Tpetra::Map constructor (noncontiguous): "
1153  "Minimum global ID = " << minAllGID_ << " over all process(es) is "
1154  "less than the given indexBase = " << indexBase_ << ".");
1155 
1156  // Create the Directory on demand in getRemoteIndexList().
1157  //setupDirectory ();
1158 
1159  if (verbose) {
1160  std::ostringstream os;
1161  os << *prefix << "Done" << endl;
1162  std::cerr << os.str();
1163  }
1164  }
1165 
1166 
1167  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1169  {
1170  if (! Kokkos::is_initialized ()) {
1171  std::ostringstream os;
1172  os << "WARNING: Tpetra::Map destructor (~Map()) is being called after "
1173  "Kokkos::finalize() has been called. This is user error! There are "
1174  "two likely causes: " << std::endl <<
1175  " 1. You have a static Tpetra::Map (or RCP or shared_ptr of a Map)"
1176  << std::endl <<
1177  " 2. You declare and construct a Tpetra::Map (or RCP or shared_ptr "
1178  "of a Tpetra::Map) at the same scope in main() as Kokkos::finalize() "
1179  "or Tpetra::finalize()." << std::endl << std::endl <<
1180  "Don't do either of these! Please refer to GitHib Issue #2372."
1181  << std::endl;
1182  ::Tpetra::Details::printOnce (std::cerr, os.str (),
1183  this->getComm ().getRawPtr ());
1184  }
1185  else {
1186  using ::Tpetra::Details::mpiIsInitialized;
1187  using ::Tpetra::Details::mpiIsFinalized;
1188  using ::Tpetra::Details::teuchosCommIsAnMpiComm;
1189 
1190  Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getComm ();
1191  if (! comm.is_null () && teuchosCommIsAnMpiComm (*comm) &&
1192  mpiIsInitialized () && mpiIsFinalized ()) {
1193  // Tpetra itself does not require MPI, even if building with
1194  // MPI. It is legal to create Tpetra objects that do not use
1195  // MPI, even in an MPI program. However, calling Tpetra stuff
1196  // after MPI_Finalize() has been called is a bad idea, since
1197  // some Tpetra defaults may use MPI if available.
1198  std::ostringstream os;
1199  os << "WARNING: Tpetra::Map destructor (~Map()) is being called after "
1200  "MPI_Finalize() has been called. This is user error! There are "
1201  "two likely causes: " << std::endl <<
1202  " 1. You have a static Tpetra::Map (or RCP or shared_ptr of a Map)"
1203  << std::endl <<
1204  " 2. You declare and construct a Tpetra::Map (or RCP or shared_ptr "
1205  "of a Tpetra::Map) at the same scope in main() as MPI_finalize() or "
1206  "Tpetra::finalize()." << std::endl << std::endl <<
1207  "Don't do either of these! Please refer to GitHib Issue #2372."
1208  << std::endl;
1209  ::Tpetra::Details::printOnce (std::cerr, os.str (), comm.getRawPtr ());
1210  }
1211  }
1212  // mfh 20 Mar 2018: We can't check Tpetra::isInitialized() yet,
1213  // because Tpetra does not yet require Tpetra::initialize /
1214  // Tpetra::finalize.
1215  }
1216 
1217  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1218  bool
1220  {
1221  TEUCHOS_TEST_FOR_EXCEPTION(
1222  getComm ().is_null (), std::logic_error, "Tpetra::Map::isOneToOne: "
1223  "getComm() returns null. Please report this bug to the Tpetra "
1224  "developers.");
1225 
1226  // This is a collective operation, if it hasn't been called before.
1227  setupDirectory ();
1228  return directory_->isOneToOne (*this);
1229  }
1230 
1231  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1232  LocalOrdinal
1234  getLocalElement (GlobalOrdinal globalIndex) const
1235  {
1236  if (isContiguous ()) {
1237  if (globalIndex < getMinGlobalIndex () ||
1238  globalIndex > getMaxGlobalIndex ()) {
1239  return Tpetra::Details::OrdinalTraits<LocalOrdinal>::invalid ();
1240  }
1241  return static_cast<LocalOrdinal> (globalIndex - getMinGlobalIndex ());
1242  }
1243  else if (globalIndex >= firstContiguousGID_ &&
1244  globalIndex <= lastContiguousGID_) {
1245  return static_cast<LocalOrdinal> (globalIndex - firstContiguousGID_);
1246  }
1247  else {
1248  // If the given global index is not in the table, this returns
1249  // the same value as OrdinalTraits<LocalOrdinal>::invalid().
1250  // glMapHost_ is Host and does not assume UVM
1251  lazyPushToHost();
1252  return glMapHost_.get (globalIndex);
1253  }
1254  }
1255 
1256  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1257  GlobalOrdinal
1259  getGlobalElement (LocalOrdinal localIndex) const
1260  {
1261  if (localIndex < getMinLocalIndex () || localIndex > getMaxLocalIndex ()) {
1262  return Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ();
1263  }
1264  if (isContiguous ()) {
1265  return getMinGlobalIndex () + localIndex;
1266  }
1267  else {
1268  // This is a host Kokkos::View access, with no RCP or ArrayRCP
1269  // involvement. As a result, it is thread safe.
1270  //
1271  // lgMapHost_ is a host pointer; this does NOT assume UVM.
1272  lazyPushToHost();
1273  return lgMapHost_[localIndex];
1274  }
1275  }
1276 
1277  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1278  bool
1280  isNodeLocalElement (LocalOrdinal localIndex) const
1281  {
1282  if (localIndex < getMinLocalIndex () || localIndex > getMaxLocalIndex ()) {
1283  return false;
1284  } else {
1285  return true;
1286  }
1287  }
1288 
1289  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1290  bool
1292  isNodeGlobalElement (GlobalOrdinal globalIndex) const {
1293  return this->getLocalElement (globalIndex) !=
1294  Tpetra::Details::OrdinalTraits<LocalOrdinal>::invalid ();
1295  }
1296 
1297  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1299  return uniform_;
1300  }
1301 
1302  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1304  return contiguous_;
1305  }
1306 
1307 
1308  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1312  {
1313  return local_map_type (glMap_, lgMap_, getIndexBase (),
1314  getMinGlobalIndex (), getMaxGlobalIndex (),
1315  firstContiguousGID_, lastContiguousGID_,
1316  getLocalNumElements (), isContiguous ());
1317  }
1318 
1319  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1320  bool
1323  {
1324  using Teuchos::outArg;
1325  using Teuchos::REDUCE_MIN;
1326  using Teuchos::reduceAll;
1327  //
1328  // Tests that avoid the Boolean all-reduce below by using
1329  // globally consistent quantities.
1330  //
1331  if (this == &map) {
1332  // Pointer equality on one process always implies pointer
1333  // equality on all processes, since Map is immutable.
1334  return true;
1335  }
1336  else if (getComm ()->getSize () != map.getComm ()->getSize ()) {
1337  // The two communicators have different numbers of processes.
1338  // It's not correct to call isCompatible() in that case. This
1339  // may result in the all-reduce hanging below.
1340  return false;
1341  }
1342  else if (getGlobalNumElements () != map.getGlobalNumElements ()) {
1343  // Two Maps are definitely NOT compatible if they have different
1344  // global numbers of indices.
1345  return false;
1346  }
1347  else if (isContiguous () && isUniform () &&
1348  map.isContiguous () && map.isUniform ()) {
1349  // Contiguous uniform Maps with the same number of processes in
1350  // their communicators, and with the same global numbers of
1351  // indices, are always compatible.
1352  return true;
1353  }
1354  else if (! isContiguous () && ! map.isContiguous () &&
1355  lgMap_.extent (0) != 0 && map.lgMap_.extent (0) != 0 &&
1356  lgMap_.data () == map.lgMap_.data ()) {
1357  // Noncontiguous Maps whose global index lists are nonempty and
1358  // have the same pointer must be the same (and therefore
1359  // contiguous).
1360  //
1361  // Nonempty is important. For example, consider a communicator
1362  // with two processes, and two Maps that share this
1363  // communicator, with zero global indices on the first process,
1364  // and different nonzero numbers of global indices on the second
1365  // process. In that case, on the first process, the pointers
1366  // would both be NULL.
1367  return true;
1368  }
1369 
1370  TEUCHOS_TEST_FOR_EXCEPTION(
1371  getGlobalNumElements () != map.getGlobalNumElements (), std::logic_error,
1372  "Tpetra::Map::isCompatible: There's a bug in this method. We've already "
1373  "checked that this condition is true above, but it's false here. "
1374  "Please report this bug to the Tpetra developers.");
1375 
1376  // Do both Maps have the same number of indices on each process?
1377  const int locallyCompat =
1378  (getLocalNumElements () == map.getLocalNumElements ()) ? 1 : 0;
1379 
1380  int globallyCompat = 0;
1381  reduceAll<int, int> (*comm_, REDUCE_MIN, locallyCompat, outArg (globallyCompat));
1382  return (globallyCompat == 1);
1383  }
1384 
1385  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1386  bool
1389  {
1390  using Teuchos::ArrayView;
1391  using GO = global_ordinal_type;
1392  using size_type = typename ArrayView<const GO>::size_type;
1393 
1394  // If both Maps are contiguous, we can compare their GID ranges
1395  // easily by looking at the min and max GID on this process.
1396  // Otherwise, we'll compare their GID lists. If only one Map is
1397  // contiguous, then we only have to call getLocalElementList() on
1398  // the noncontiguous Map. (It's best to avoid calling it on a
1399  // contiguous Map, since it results in unnecessary storage that
1400  // persists for the lifetime of the Map.)
1401 
1402  if (this == &map) {
1403  // Pointer equality on one process always implies pointer
1404  // equality on all processes, since Map is immutable.
1405  return true;
1406  }
1407  else if (getLocalNumElements () != map.getLocalNumElements ()) {
1408  return false;
1409  }
1410  else if (getMinGlobalIndex () != map.getMinGlobalIndex () ||
1411  getMaxGlobalIndex () != map.getMaxGlobalIndex ()) {
1412  return false;
1413  }
1414  else {
1415  if (isContiguous ()) {
1416  if (map.isContiguous ()) {
1417  return true; // min and max match, so the ranges match.
1418  }
1419  else { // *this is contiguous, but map is not contiguous
1420  TEUCHOS_TEST_FOR_EXCEPTION(
1421  ! this->isContiguous () || map.isContiguous (), std::logic_error,
1422  "Tpetra::Map::locallySameAs: BUG");
1423  ArrayView<const GO> rhsElts = map.getLocalElementList ();
1424  const GO minLhsGid = this->getMinGlobalIndex ();
1425  const size_type numRhsElts = rhsElts.size ();
1426  for (size_type k = 0; k < numRhsElts; ++k) {
1427  const GO curLhsGid = minLhsGid + static_cast<GO> (k);
1428  if (curLhsGid != rhsElts[k]) {
1429  return false; // stop on first mismatch
1430  }
1431  }
1432  return true;
1433  }
1434  }
1435  else if (map.isContiguous ()) { // *this is not contiguous, but map is
1436  TEUCHOS_TEST_FOR_EXCEPTION(
1437  this->isContiguous () || ! map.isContiguous (), std::logic_error,
1438  "Tpetra::Map::locallySameAs: BUG");
1439  ArrayView<const GO> lhsElts = this->getLocalElementList ();
1440  const GO minRhsGid = map.getMinGlobalIndex ();
1441  const size_type numLhsElts = lhsElts.size ();
1442  for (size_type k = 0; k < numLhsElts; ++k) {
1443  const GO curRhsGid = minRhsGid + static_cast<GO> (k);
1444  if (curRhsGid != lhsElts[k]) {
1445  return false; // stop on first mismatch
1446  }
1447  }
1448  return true;
1449  }
1450  else if (this->lgMap_.data () == map.lgMap_.data ()) {
1451  // Pointers to LID->GID "map" (actually just an array) are the
1452  // same, and the number of GIDs are the same.
1453  return this->getLocalNumElements () == map.getLocalNumElements ();
1454  }
1455  else { // we actually have to compare the GIDs
1456  if (this->getLocalNumElements () != map.getLocalNumElements ()) {
1457  return false; // We already checked above, but check just in case
1458  }
1459  else {
1460  ArrayView<const GO> lhsElts = getLocalElementList ();
1461  ArrayView<const GO> rhsElts = map.getLocalElementList ();
1462 
1463  // std::equal requires that the latter range is as large as
1464  // the former. We know the ranges have equal length, because
1465  // they have the same number of local entries.
1466  return std::equal (lhsElts.begin (), lhsElts.end (), rhsElts.begin ());
1467  }
1468  }
1469  }
1470  }
1471 
1472  template <class LocalOrdinal,class GlobalOrdinal, class Node>
1473  bool
1476  {
1477  if (this == &map)
1478  return true;
1479 
1480  // We are going to check if lmap1 is fitted into lmap2:
1481  // Is lmap1 (map) a subset of lmap2 (this)?
1482  // And do the first lmap1.getLocalNumElements() global elements
1483  // of lmap1,lmap2 owned on each process exactly match?
1484  auto lmap1 = map.getLocalMap();
1485  auto lmap2 = this->getLocalMap();
1486 
1487  auto numLocalElements1 = lmap1.getLocalNumElements();
1488  auto numLocalElements2 = lmap2.getLocalNumElements();
1489 
1490  if (numLocalElements1 > numLocalElements2) {
1491  // There are more indices in the first map on this process than in second map.
1492  return false;
1493  }
1494 
1495  if (lmap1.isContiguous () && lmap2.isContiguous ()) {
1496  // When both Maps are contiguous, just check the interval inclusion.
1497  return ((lmap1.getMinGlobalIndex () == lmap2.getMinGlobalIndex ()) &&
1498  (lmap1.getMaxGlobalIndex () <= lmap2.getMaxGlobalIndex ()));
1499  }
1500 
1501  if (lmap1.getMinGlobalIndex () < lmap2.getMinGlobalIndex () ||
1502  lmap1.getMaxGlobalIndex () > lmap2.getMaxGlobalIndex ()) {
1503  // The second map does not include the first map bounds, and thus some of
1504  // the first map global indices are not in the second map.
1505  return false;
1506  }
1507 
1508  using LO = local_ordinal_type;
1509  using range_type =
1510  Kokkos::RangePolicy<LO, typename node_type::execution_space>;
1511 
1512  // Check all elements.
1513  LO numDiff = 0;
1514  Kokkos::parallel_reduce(
1515  "isLocallyFitted",
1516  range_type(0, numLocalElements1),
1517  KOKKOS_LAMBDA (const LO i, LO& diff) {
1518  diff += (lmap1.getGlobalElement(i) != lmap2.getGlobalElement(i));
1519  }, numDiff);
1520 
1521  return (numDiff == 0);
1522  }
1523 
1524  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1525  bool
1528  {
1529  using Teuchos::outArg;
1530  using Teuchos::REDUCE_MIN;
1531  using Teuchos::reduceAll;
1532  //
1533  // Tests that avoid the Boolean all-reduce below by using
1534  // globally consistent quantities.
1535  //
1536  if (this == &map) {
1537  // Pointer equality on one process always implies pointer
1538  // equality on all processes, since Map is immutable.
1539  return true;
1540  }
1541  else if (getComm ()->getSize () != map.getComm ()->getSize ()) {
1542  // The two communicators have different numbers of processes.
1543  // It's not correct to call isSameAs() in that case. This
1544  // may result in the all-reduce hanging below.
1545  return false;
1546  }
1547  else if (getGlobalNumElements () != map.getGlobalNumElements ()) {
1548  // Two Maps are definitely NOT the same if they have different
1549  // global numbers of indices.
1550  return false;
1551  }
1552  else if (getMinAllGlobalIndex () != map.getMinAllGlobalIndex () ||
1553  getMaxAllGlobalIndex () != map.getMaxAllGlobalIndex () ||
1554  getIndexBase () != map.getIndexBase ()) {
1555  // If the global min or max global index doesn't match, or if
1556  // the index base doesn't match, then the Maps aren't the same.
1557  return false;
1558  }
1559  else if (isDistributed () != map.isDistributed ()) {
1560  // One Map is distributed and the other is not, which means that
1561  // the Maps aren't the same.
1562  return false;
1563  }
1564  else if (isContiguous () && isUniform () &&
1565  map.isContiguous () && map.isUniform ()) {
1566  // Contiguous uniform Maps with the same number of processes in
1567  // their communicators, with the same global numbers of indices,
1568  // and with matching index bases and ranges, must be the same.
1569  return true;
1570  }
1571 
1572  // The two communicators must have the same number of processes,
1573  // with process ranks occurring in the same order. This uses
1574  // MPI_COMM_COMPARE. The MPI 3.1 standard (Section 6.4) says:
1575  // "Operations that access communicators are local and their
1576  // execution does not require interprocess communication."
1577  // However, just to be sure, I'll put this call after the above
1578  // tests that don't communicate.
1579  if (! ::Tpetra::Details::congruent (*comm_, * (map.getComm ()))) {
1580  return false;
1581  }
1582 
1583  // If we get this far, we need to check local properties and then
1584  // communicate local sameness across all processes.
1585  const int isSame_lcl = locallySameAs (map) ? 1 : 0;
1586 
1587  // Return true if and only if all processes report local sameness.
1588  int isSame_gbl = 0;
1589  reduceAll<int, int> (*comm_, REDUCE_MIN, isSame_lcl, outArg (isSame_gbl));
1590  return isSame_gbl == 1;
1591  }
1592 
1593  namespace { // (anonymous)
1594  template <class LO, class GO, class DT>
1595  class FillLgMap {
1596  public:
1597  FillLgMap (const Kokkos::View<GO*, DT>& lgMap,
1598  const GO startGid) :
1599  lgMap_ (lgMap), startGid_ (startGid)
1600  {
1601  Kokkos::RangePolicy<LO, typename DT::execution_space>
1602  range (static_cast<LO> (0), static_cast<LO> (lgMap.size ()));
1603  Kokkos::parallel_for (range, *this);
1604  }
1605 
1606  KOKKOS_INLINE_FUNCTION void operator () (const LO& lid) const {
1607  lgMap_(lid) = startGid_ + static_cast<GO> (lid);
1608  }
1609 
1610  private:
1611  const Kokkos::View<GO*, DT> lgMap_;
1612  const GO startGid_;
1613  };
1614 
1615  } // namespace (anonymous)
1616 
1617 
1618  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1619  typename Map<LocalOrdinal,GlobalOrdinal,Node>::global_indices_array_type
1621  {
1622  using std::endl;
1623  using LO = local_ordinal_type;
1624  using GO = global_ordinal_type;
1625  using const_lg_view_type = decltype(lgMap_);
1626  using lg_view_type = typename const_lg_view_type::non_const_type;
1627  const bool debug = Details::Behavior::debug("Map");
1628  const bool verbose = Details::Behavior::verbose("Map");
1629 
1630  std::unique_ptr<std::string> prefix;
1631  if (verbose) {
1632  prefix = Details::createPrefix(
1633  comm_.getRawPtr(), "Map", "getMyGlobalIndices");
1634  std::ostringstream os;
1635  os << *prefix << "Start" << endl;
1636  std::cerr << os.str();
1637  }
1638 
1639  // If the local-to-global mapping doesn't exist yet, and if we
1640  // have local entries, then create and fill the local-to-global
1641  // mapping.
1642  const bool needToCreateLocalToGlobalMapping =
1643  lgMap_.extent (0) == 0 && numLocalElements_ > 0;
1644 
1645  if (needToCreateLocalToGlobalMapping) {
1646  if (verbose) {
1647  std::ostringstream os;
1648  os << *prefix << "Need to create lgMap" << endl;
1649  std::cerr << os.str();
1650  }
1651  if (debug) {
1652  // The local-to-global mapping should have been set up already
1653  // for a noncontiguous map.
1654  TEUCHOS_TEST_FOR_EXCEPTION
1655  (! isContiguous(), std::logic_error,
1656  "Tpetra::Map::getMyGlobalIndices: The local-to-global "
1657  "mapping (lgMap_) should have been set up already for a "
1658  "noncontiguous Map. Please report this bug to the Tpetra "
1659  "developers.");
1660  }
1661  const LO numElts = static_cast<LO> (getLocalNumElements ());
1662 
1663  using Kokkos::view_alloc;
1664  using Kokkos::WithoutInitializing;
1665  lg_view_type lgMap ("lgMap", numElts);
1666  if (verbose) {
1667  std::ostringstream os;
1668  os << *prefix << "Fill lgMap" << endl;
1669  std::cerr << os.str();
1670  }
1671  FillLgMap<LO, GO, device_type> fillIt (lgMap, minMyGID_);
1672 
1673  if (verbose) {
1674  std::ostringstream os;
1675  os << *prefix << "Copy lgMap to lgMapHost" << endl;
1676  std::cerr << os.str();
1677  }
1678 
1679  auto lgMapHost = Kokkos::create_mirror_view (Kokkos::HostSpace (), lgMap);
1680  // DEEP_COPY REVIEW - DEVICE-TO-HOST
1681  auto exec_instance = execution_space();
1682  Kokkos::deep_copy (exec_instance, lgMapHost, lgMap);
1683 
1684  // There's a non-trivial chance we'll grab this on the host,
1685  // so let's make sure the copy finishes
1686  exec_instance.fence();
1687 
1688  // "Commit" the local-to-global lookup table we filled in above.
1689  lgMap_ = lgMap;
1690  lgMapHost_ = lgMapHost;
1691  }
1692  else {
1693  lazyPushToHost();
1694  }
1695 
1696  if (verbose) {
1697  std::ostringstream os;
1698  os << *prefix << "Done" << endl;
1699  std::cerr << os.str();
1700  }
1701  return lgMapHost_;
1702  }
1703 
1704 
1705  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1706  typename Map<LocalOrdinal,GlobalOrdinal,Node>::global_indices_array_device_type
1708  {
1709  using std::endl;
1710  using LO = local_ordinal_type;
1711  using GO = global_ordinal_type;
1712  using const_lg_view_type = decltype(lgMap_);
1713  using lg_view_type = typename const_lg_view_type::non_const_type;
1714  const bool debug = Details::Behavior::debug("Map");
1715  const bool verbose = Details::Behavior::verbose("Map");
1716 
1717  std::unique_ptr<std::string> prefix;
1718  if (verbose) {
1719  prefix = Details::createPrefix(
1720  comm_.getRawPtr(), "Map", "getMyGlobalIndicesDevice");
1721  std::ostringstream os;
1722  os << *prefix << "Start" << endl;
1723  std::cerr << os.str();
1724  }
1725 
1726  // If the local-to-global mapping doesn't exist yet, and if we
1727  // have local entries, then create and fill the local-to-global
1728  // mapping.
1729  const bool needToCreateLocalToGlobalMapping =
1730  lgMap_.extent (0) == 0 && numLocalElements_ > 0;
1731 
1732  if (needToCreateLocalToGlobalMapping) {
1733  if (verbose) {
1734  std::ostringstream os;
1735  os << *prefix << "Need to create lgMap" << endl;
1736  std::cerr << os.str();
1737  }
1738  if (debug) {
1739  // The local-to-global mapping should have been set up already
1740  // for a noncontiguous map.
1741  TEUCHOS_TEST_FOR_EXCEPTION
1742  (! isContiguous(), std::logic_error,
1743  "Tpetra::Map::getMyGlobalIndices: The local-to-global "
1744  "mapping (lgMap_) should have been set up already for a "
1745  "noncontiguous Map. Please report this bug to the Tpetra "
1746  "developers.");
1747  }
1748  const LO numElts = static_cast<LO> (getLocalNumElements ());
1749 
1750  using Kokkos::view_alloc;
1751  using Kokkos::WithoutInitializing;
1752  lg_view_type lgMap ("lgMap", numElts);
1753  if (verbose) {
1754  std::ostringstream os;
1755  os << *prefix << "Fill lgMap" << endl;
1756  std::cerr << os.str();
1757  }
1758  FillLgMap<LO, GO, device_type> fillIt (lgMap, minMyGID_);
1759 
1760  // "Commit" the local-to-global lookup table we filled in above.
1761  lgMap_ = lgMap;
1762  }
1763 
1764  if (verbose) {
1765  std::ostringstream os;
1766  os << *prefix << "Done" << endl;
1767  std::cerr << os.str();
1768  }
1769  return lgMap_;
1770  }
1771 
1772 
1773 
1774  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1775  Teuchos::ArrayView<const GlobalOrdinal>
1777  {
1778  using GO = global_ordinal_type;
1779 
1780  // If the local-to-global mapping doesn't exist yet, and if we
1781  // have local entries, then create and fill the local-to-global
1782  // mapping.
1783  (void) this->getMyGlobalIndices ();
1784 
1785  // This does NOT assume UVM; lgMapHost_ is a host pointer.
1786  lazyPushToHost();
1787  const GO* lgMapHostRawPtr = lgMapHost_.data ();
1788  // The third argument forces ArrayView not to try to track memory
1789  // in a debug build. We have to use it because the memory does
1790  // not belong to a Teuchos memory management class.
1791  return Teuchos::ArrayView<const GO>(
1792  lgMapHostRawPtr,
1793  lgMapHost_.extent (0),
1794  Teuchos::RCP_DISABLE_NODE_LOOKUP);
1795  }
1796 
1797  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1799  return distributed_;
1800  }
1801 
1802  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1804  using Teuchos::TypeNameTraits;
1805  std::ostringstream os;
1806 
1807  os << "Tpetra::Map: {"
1808  << "LocalOrdinalType: " << TypeNameTraits<LocalOrdinal>::name ()
1809  << ", GlobalOrdinalType: " << TypeNameTraits<GlobalOrdinal>::name ()
1810  << ", NodeType: " << TypeNameTraits<Node>::name ();
1811  if (this->getObjectLabel () != "") {
1812  os << ", Label: \"" << this->getObjectLabel () << "\"";
1813  }
1814  os << ", Global number of entries: " << getGlobalNumElements ()
1815  << ", Number of processes: " << getComm ()->getSize ()
1816  << ", Uniform: " << (isUniform () ? "true" : "false")
1817  << ", Contiguous: " << (isContiguous () ? "true" : "false")
1818  << ", Distributed: " << (isDistributed () ? "true" : "false")
1819  << "}";
1820  return os.str ();
1821  }
1822 
1827  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1828  std::string
1830  localDescribeToString (const Teuchos::EVerbosityLevel vl) const
1831  {
1832  using LO = local_ordinal_type;
1833  using std::endl;
1834 
1835  // This preserves current behavior of Map.
1836  if (vl < Teuchos::VERB_HIGH) {
1837  return std::string ();
1838  }
1839  auto outStringP = Teuchos::rcp (new std::ostringstream ());
1840  Teuchos::RCP<Teuchos::FancyOStream> outp =
1841  Teuchos::getFancyOStream (outStringP);
1842  Teuchos::FancyOStream& out = *outp;
1843 
1844  auto comm = this->getComm ();
1845  const int myRank = comm->getRank ();
1846  const int numProcs = comm->getSize ();
1847  out << "Process " << myRank << " of " << numProcs << ":" << endl;
1848  Teuchos::OSTab tab1 (out);
1849 
1850  const LO numEnt = static_cast<LO> (this->getLocalNumElements ());
1851  out << "My number of entries: " << numEnt << endl
1852  << "My minimum global index: " << this->getMinGlobalIndex () << endl
1853  << "My maximum global index: " << this->getMaxGlobalIndex () << endl;
1854 
1855  if (vl == Teuchos::VERB_EXTREME) {
1856  out << "My global indices: [";
1857  const LO minLclInd = this->getMinLocalIndex ();
1858  for (LO k = 0; k < numEnt; ++k) {
1859  out << minLclInd + this->getGlobalElement (k);
1860  if (k + 1 < numEnt) {
1861  out << ", ";
1862  }
1863  }
1864  out << "]" << endl;
1865  }
1866 
1867  out.flush (); // make sure the ostringstream got everything
1868  return outStringP->str ();
1869  }
1870 
1871  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1872  void
1874  describe (Teuchos::FancyOStream &out,
1875  const Teuchos::EVerbosityLevel verbLevel) const
1876  {
1877  using Teuchos::TypeNameTraits;
1878  using Teuchos::VERB_DEFAULT;
1879  using Teuchos::VERB_NONE;
1880  using Teuchos::VERB_LOW;
1881  using Teuchos::VERB_HIGH;
1882  using std::endl;
1883  using LO = local_ordinal_type;
1884  using GO = global_ordinal_type;
1885  const Teuchos::EVerbosityLevel vl =
1886  (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
1887 
1888  if (vl == VERB_NONE) {
1889  return; // don't print anything
1890  }
1891  // If this Map's Comm is null, then the Map does not participate
1892  // in collective operations with the other processes. In that
1893  // case, it is not even legal to call this method. The reasonable
1894  // thing to do in that case is nothing.
1895  auto comm = this->getComm ();
1896  if (comm.is_null ()) {
1897  return;
1898  }
1899  const int myRank = comm->getRank ();
1900  const int numProcs = comm->getSize ();
1901 
1902  // Only Process 0 should touch the output stream, but this method
1903  // in general may need to do communication. Thus, we may need to
1904  // preserve the current tab level across multiple "if (myRank ==
1905  // 0) { ... }" inner scopes. This is why we sometimes create
1906  // OSTab instances by pointer, instead of by value. We only need
1907  // to create them by pointer if the tab level must persist through
1908  // multiple inner scopes.
1909  Teuchos::RCP<Teuchos::OSTab> tab0, tab1;
1910 
1911  if (myRank == 0) {
1912  // At every verbosity level but VERB_NONE, Process 0 prints.
1913  // By convention, describe() always begins with a tab before
1914  // printing.
1915  tab0 = Teuchos::rcp (new Teuchos::OSTab (out));
1916  out << "\"Tpetra::Map\":" << endl;
1917  tab1 = Teuchos::rcp (new Teuchos::OSTab (out));
1918  {
1919  out << "Template parameters:" << endl;
1920  Teuchos::OSTab tab2 (out);
1921  out << "LocalOrdinal: " << TypeNameTraits<LO>::name () << endl
1922  << "GlobalOrdinal: " << TypeNameTraits<GO>::name () << endl
1923  << "Node: " << TypeNameTraits<Node>::name () << endl;
1924  }
1925  const std::string label = this->getObjectLabel ();
1926  if (label != "") {
1927  out << "Label: \"" << label << "\"" << endl;
1928  }
1929  out << "Global number of entries: " << getGlobalNumElements () << endl
1930  << "Minimum global index: " << getMinAllGlobalIndex () << endl
1931  << "Maximum global index: " << getMaxAllGlobalIndex () << endl
1932  << "Index base: " << getIndexBase () << endl
1933  << "Number of processes: " << numProcs << endl
1934  << "Uniform: " << (isUniform () ? "true" : "false") << endl
1935  << "Contiguous: " << (isContiguous () ? "true" : "false") << endl
1936  << "Distributed: " << (isDistributed () ? "true" : "false") << endl;
1937  }
1938 
1939  // This is collective over the Map's communicator.
1940  if (vl >= VERB_HIGH) { // VERB_HIGH or VERB_EXTREME
1941  const std::string lclStr = this->localDescribeToString (vl);
1942  Tpetra::Details::gathervPrint (out, lclStr, *comm);
1943  }
1944  }
1945 
1946  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1947  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
1949  replaceCommWithSubset (const Teuchos::RCP<const Teuchos::Comm<int> >& newComm) const
1950  {
1951  using Teuchos::RCP;
1952  using Teuchos::rcp;
1953  using GST = global_size_t;
1954  using LO = local_ordinal_type;
1955  using GO = global_ordinal_type;
1956  using map_type = Map<LO, GO, Node>;
1957 
1958  // mfh 26 Mar 2013: The lazy way to do this is simply to recreate
1959  // the Map by calling its ordinary public constructor, using the
1960  // original Map's data. This only involves O(1) all-reduces over
1961  // the new communicator, which in the common case only includes a
1962  // small number of processes.
1963 
1964  // Create the Map to return.
1965  if (newComm.is_null () || newComm->getSize () < 1) {
1966  return Teuchos::null; // my process does not participate in the new Map
1967  }
1968  else if (newComm->getSize () == 1) {
1969  lazyPushToHost();
1970 
1971  // The case where the new communicator has only one process is
1972  // easy. We don't have to communicate to get all the
1973  // information we need. Use the default comm to create the new
1974  // Map, then fill in all the fields directly.
1975  RCP<map_type> newMap (new map_type ());
1976 
1977  newMap->comm_ = newComm;
1978  // mfh 07 Oct 2016: Preserve original behavior, even though the
1979  // original index base may no longer be the globally min global
1980  // index. See #616 for why this doesn't matter so much anymore.
1981  newMap->indexBase_ = this->indexBase_;
1982  newMap->numGlobalElements_ = this->numLocalElements_;
1983  newMap->numLocalElements_ = this->numLocalElements_;
1984  newMap->minMyGID_ = this->minMyGID_;
1985  newMap->maxMyGID_ = this->maxMyGID_;
1986  newMap->minAllGID_ = this->minMyGID_;
1987  newMap->maxAllGID_ = this->maxMyGID_;
1988  newMap->firstContiguousGID_ = this->firstContiguousGID_;
1989  newMap->lastContiguousGID_ = this->lastContiguousGID_;
1990  // Since the new communicator has only one process, neither
1991  // uniformity nor contiguity have changed.
1992  newMap->uniform_ = this->uniform_;
1993  newMap->contiguous_ = this->contiguous_;
1994  // The new communicator only has one process, so the new Map is
1995  // not distributed.
1996  newMap->distributed_ = false;
1997  newMap->lgMap_ = this->lgMap_;
1998  newMap->lgMapHost_ = this->lgMapHost_;
1999  newMap->glMap_ = this->glMap_;
2000  newMap->glMapHost_ = this->glMapHost_;
2001  // It's OK not to initialize the new Map's Directory.
2002  // This is initialized lazily, on first call to getRemoteIndexList.
2003 
2004  return newMap;
2005  }
2006  else { // newComm->getSize() != 1
2007  // Even if the original Map is contiguous, the new Map might not
2008  // be, especially if the excluded processes have ranks != 0 or
2009  // newComm->getSize()-1. The common case for this method is to
2010  // exclude many (possibly even all but one) processes, so it
2011  // likely doesn't pay to do the global communication (over the
2012  // original communicator) to figure out whether we can optimize
2013  // the result Map. Thus, we just set up the result Map as
2014  // noncontiguous.
2015  //
2016  // TODO (mfh 07 Oct 2016) We don't actually need to reconstruct
2017  // the global-to-local table, etc. Optimize this code path to
2018  // avoid unnecessary local work.
2019 
2020  // Make Map (re)compute the global number of elements.
2021  const GST RECOMPUTE = Tpetra::Details::OrdinalTraits<GST>::invalid ();
2022  // TODO (mfh 07 Oct 2016) If we use any Map constructor, we have
2023  // to use the noncontiguous Map constructor, since the new Map
2024  // might not be contiguous. Even if the old Map was contiguous,
2025  // some process in the "middle" might have been excluded. If we
2026  // want to avoid local work, we either have to do the setup by
2027  // hand, or write a new Map constructor.
2028 #if 1
2029  // The disabled code here throws the following exception in
2030  // Map's replaceCommWithSubset test:
2031  //
2032  // Throw test that evaluated to true: static_cast<unsigned long long> (numKeys) > static_cast<unsigned long long> (::Kokkos::ArithTraits<ValueType>::max ())
2033  // 10:
2034  // 10: Tpetra::Details::FixedHashTable: The number of keys -3 is greater than the maximum representable ValueType value 2147483647. This means that it is not possible to use this constructor.
2035  // 10: Process 3: origComm->replaceCommWithSubset(subsetComm) threw an exception: /scratch/prj/Trilinos/Trilinos/packages/tpetra/core/src/Tpetra_Details_FixedHashTable_def.hpp:1044:
2036 
2037  auto lgMap = this->getMyGlobalIndices ();
2038  using size_type =
2039  typename std::decay<decltype (lgMap.extent (0)) >::type;
2040  const size_type lclNumInds =
2041  static_cast<size_type> (this->getLocalNumElements ());
2042  using Teuchos::TypeNameTraits;
2043  TEUCHOS_TEST_FOR_EXCEPTION
2044  (lgMap.extent (0) != lclNumInds, std::logic_error,
2045  "Tpetra::Map::replaceCommWithSubset: Result of getMyGlobalIndices() "
2046  "has length " << lgMap.extent (0) << " (of type " <<
2047  TypeNameTraits<size_type>::name () << ") != this->getLocalNumElements()"
2048  " = " << this->getLocalNumElements () << ". The latter, upon being "
2049  "cast to size_type = " << TypeNameTraits<size_type>::name () << ", "
2050  "becomes " << lclNumInds << ". Please report this bug to the Tpetra "
2051  "developers.");
2052 #else
2053  Teuchos::ArrayView<const GO> lgMap = this->getLocalElementList ();
2054 #endif // 1
2055 
2056  const GO indexBase = this->getIndexBase ();
2057  // map stores HostSpace of CudaSpace but constructor is still CudaUVMSpace
2058  auto lgMap_device = Kokkos::create_mirror_view_and_copy(device_type(), lgMap);
2059  return rcp (new map_type (RECOMPUTE, lgMap_device, indexBase, newComm));
2060  }
2061  }
2062 
2063  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2064  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
2067  {
2068  using Teuchos::Comm;
2069  using Teuchos::null;
2070  using Teuchos::outArg;
2071  using Teuchos::RCP;
2072  using Teuchos::rcp;
2073  using Teuchos::REDUCE_MIN;
2074  using Teuchos::reduceAll;
2075 
2076  // Create the new communicator. split() returns a valid
2077  // communicator on all processes. On processes where color == 0,
2078  // ignore the result. Passing key == 0 tells MPI to order the
2079  // processes in the new communicator by their rank in the old
2080  // communicator.
2081  const int color = (numLocalElements_ == 0) ? 0 : 1;
2082  // MPI_Comm_split must be called collectively over the original
2083  // communicator. We can't just call it on processes with color
2084  // one, even though we will ignore its result on processes with
2085  // color zero.
2086  RCP<const Comm<int> > newComm = comm_->split (color, 0);
2087  if (color == 0) {
2088  newComm = null;
2089  }
2090 
2091  // Create the Map to return.
2092  if (newComm.is_null ()) {
2093  return null; // my process does not participate in the new Map
2094  } else {
2095  RCP<Map> map = rcp (new Map ());
2096 
2097  map->comm_ = newComm;
2098  map->indexBase_ = indexBase_;
2099  map->numGlobalElements_ = numGlobalElements_;
2100  map->numLocalElements_ = numLocalElements_;
2101  map->minMyGID_ = minMyGID_;
2102  map->maxMyGID_ = maxMyGID_;
2103  map->minAllGID_ = minAllGID_;
2104  map->maxAllGID_ = maxAllGID_;
2105  map->firstContiguousGID_= firstContiguousGID_;
2106  map->lastContiguousGID_ = lastContiguousGID_;
2107 
2108  // Uniformity and contiguity have not changed. The directory
2109  // has changed, but we've taken care of that above.
2110  map->uniform_ = uniform_;
2111  map->contiguous_ = contiguous_;
2112 
2113  // If the original Map was NOT distributed, then the new Map
2114  // cannot be distributed.
2115  //
2116  // If the number of processes in the new communicator is 1, then
2117  // the new Map is not distributed.
2118  //
2119  // Otherwise, we have to check the new Map using an all-reduce
2120  // (over the new communicator). For example, the original Map
2121  // may have had some processes with zero elements, and all other
2122  // processes with the same number of elements as in the whole
2123  // Map. That Map is technically distributed, because of the
2124  // processes with zero elements. Removing those processes would
2125  // make the new Map locally replicated.
2126  if (! distributed_ || newComm->getSize () == 1) {
2127  map->distributed_ = false;
2128  } else {
2129  const int iOwnAllGids = (numLocalElements_ == numGlobalElements_) ? 1 : 0;
2130  int allProcsOwnAllGids = 0;
2131  reduceAll<int, int> (*newComm, REDUCE_MIN, iOwnAllGids, outArg (allProcsOwnAllGids));
2132  map->distributed_ = (allProcsOwnAllGids == 1) ? false : true;
2133  }
2134 
2135  map->lgMap_ = lgMap_;
2136  map->lgMapHost_ = lgMapHost_;
2137  map->glMap_ = glMap_;
2138  map->glMapHost_ = glMapHost_;
2139 
2140  // Map's default constructor creates an uninitialized Directory.
2141  // The Directory will be initialized on demand in
2142  // getRemoteIndexList().
2143  //
2144  // FIXME (mfh 26 Mar 2013) It should be possible to "filter" the
2145  // directory more efficiently than just recreating it. If
2146  // directory recreation proves a bottleneck, we can always
2147  // revisit this. On the other hand, Directory creation is only
2148  // collective over the new, presumably much smaller
2149  // communicator, so it may not be worth the effort to optimize.
2150 
2151  return map;
2152  }
2153  }
2154 
2155  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2156  void
2158  {
2159  TEUCHOS_TEST_FOR_EXCEPTION(
2160  directory_.is_null (), std::logic_error, "Tpetra::Map::setupDirectory: "
2161  "The Directory is null. "
2162  "Please report this bug to the Tpetra developers.");
2163 
2164  // Only create the Directory if it hasn't been created yet.
2165  // This is a collective operation.
2166  if (! directory_->initialized ()) {
2167  directory_->initialize (*this);
2168  }
2169  }
2170 
2171  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2172  LookupStatus
2174  getRemoteIndexList (const Teuchos::ArrayView<const GlobalOrdinal>& GIDs,
2175  const Teuchos::ArrayView<int>& PIDs,
2176  const Teuchos::ArrayView<LocalOrdinal>& LIDs) const
2177  {
2178  using Tpetra::Details::OrdinalTraits;
2180  using std::endl;
2181  using size_type = Teuchos::ArrayView<int>::size_type;
2182 
2183  const bool verbose = Details::Behavior::verbose("Map");
2184  const size_t maxNumToPrint = verbose ?
2186  std::unique_ptr<std::string> prefix;
2187  if (verbose) {
2188  prefix = Details::createPrefix(comm_.getRawPtr(),
2189  "Map", "getRemoteIndexList(GIDs,PIDs,LIDs)");
2190  std::ostringstream os;
2191  os << *prefix << "Start: ";
2192  verbosePrintArray(os, GIDs, "GIDs", maxNumToPrint);
2193  os << endl;
2194  std::cerr << os.str();
2195  }
2196 
2197  // Empty Maps (i.e., containing no indices on any processes in the
2198  // Map's communicator) are perfectly valid. In that case, if the
2199  // input GID list is nonempty, we fill the output arrays with
2200  // invalid values, and return IDNotPresent to notify the caller.
2201  // It's perfectly valid to give getRemoteIndexList GIDs that the
2202  // Map doesn't own. SubmapImport test 2 needs this functionality.
2203  if (getGlobalNumElements () == 0) {
2204  if (GIDs.size () == 0) {
2205  if (verbose) {
2206  std::ostringstream os;
2207  os << *prefix << "Done; both Map & input are empty" << endl;
2208  std::cerr << os.str();
2209  }
2210  return AllIDsPresent; // trivially
2211  }
2212  else {
2213  if (verbose) {
2214  std::ostringstream os;
2215  os << *prefix << "Done: Map is empty on all processes, "
2216  "so all output PIDs & LIDs are invalid (-1)." << endl;
2217  std::cerr << os.str();
2218  }
2219  for (size_type k = 0; k < PIDs.size (); ++k) {
2220  PIDs[k] = OrdinalTraits<int>::invalid ();
2221  }
2222  for (size_type k = 0; k < LIDs.size (); ++k) {
2223  LIDs[k] = OrdinalTraits<LocalOrdinal>::invalid ();
2224  }
2225  return IDNotPresent;
2226  }
2227  }
2228 
2229  // getRemoteIndexList must be called collectively, and Directory
2230  // initialization is collective too, so it's OK to initialize the
2231  // Directory on demand.
2232 
2233  if (verbose) {
2234  std::ostringstream os;
2235  os << *prefix << "Call setupDirectory" << endl;
2236  std::cerr << os.str();
2237  }
2238  setupDirectory();
2239  if (verbose) {
2240  std::ostringstream os;
2241  os << *prefix << "Call directory_->getDirectoryEntries" << endl;
2242  std::cerr << os.str();
2243  }
2244  const Tpetra::LookupStatus retVal =
2245  directory_->getDirectoryEntries (*this, GIDs, PIDs, LIDs);
2246  if (verbose) {
2247  std::ostringstream os;
2248  os << *prefix << "Done; getDirectoryEntries returned "
2249  << (retVal == IDNotPresent ? "IDNotPresent" : "AllIDsPresent")
2250  << "; ";
2251  verbosePrintArray(os, PIDs, "PIDs", maxNumToPrint);
2252  os << ", ";
2253  verbosePrintArray(os, LIDs, "LIDs", maxNumToPrint);
2254  os << endl;
2255  std::cerr << os.str();
2256  }
2257  return retVal;
2258  }
2259 
2260  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2261  LookupStatus
2263  getRemoteIndexList (const Teuchos::ArrayView<const GlobalOrdinal> & GIDs,
2264  const Teuchos::ArrayView<int> & PIDs) const
2265  {
2267  using std::endl;
2268 
2269  const bool verbose = Details::Behavior::verbose("Map");
2270  const size_t maxNumToPrint = verbose ?
2272  std::unique_ptr<std::string> prefix;
2273  if (verbose) {
2274  prefix = Details::createPrefix(comm_.getRawPtr(),
2275  "Map", "getRemoteIndexList(GIDs,PIDs)");
2276  std::ostringstream os;
2277  os << *prefix << "Start: ";
2278  verbosePrintArray(os, GIDs, "GIDs", maxNumToPrint);
2279  os << endl;
2280  std::cerr << os.str();
2281  }
2282 
2283  if (getGlobalNumElements () == 0) {
2284  if (GIDs.size () == 0) {
2285  if (verbose) {
2286  std::ostringstream os;
2287  os << *prefix << "Done; both Map & input are empty" << endl;
2288  std::cerr << os.str();
2289  }
2290  return AllIDsPresent; // trivially
2291  }
2292  else {
2293  if (verbose) {
2294  std::ostringstream os;
2295  os << *prefix << "Done: Map is empty on all processes, "
2296  "so all output PIDs are invalid (-1)." << endl;
2297  std::cerr << os.str();
2298  }
2299  for (Teuchos::ArrayView<int>::size_type k = 0; k < PIDs.size (); ++k) {
2300  PIDs[k] = Tpetra::Details::OrdinalTraits<int>::invalid ();
2301  }
2302  return IDNotPresent;
2303  }
2304  }
2305 
2306  // getRemoteIndexList must be called collectively, and Directory
2307  // initialization is collective too, so it's OK to initialize the
2308  // Directory on demand.
2309 
2310  if (verbose) {
2311  std::ostringstream os;
2312  os << *prefix << "Call setupDirectory" << endl;
2313  std::cerr << os.str();
2314  }
2315  setupDirectory();
2316  if (verbose) {
2317  std::ostringstream os;
2318  os << *prefix << "Call directory_->getDirectoryEntries" << endl;
2319  std::cerr << os.str();
2320  }
2321  const Tpetra::LookupStatus retVal =
2322  directory_->getDirectoryEntries(*this, GIDs, PIDs);
2323  if (verbose) {
2324  std::ostringstream os;
2325  os << *prefix << "Done; getDirectoryEntries returned "
2326  << (retVal == IDNotPresent ? "IDNotPresent" : "AllIDsPresent")
2327  << "; ";
2328  verbosePrintArray(os, PIDs, "PIDs", maxNumToPrint);
2329  os << endl;
2330  std::cerr << os.str();
2331  }
2332  return retVal;
2333  }
2334 
2335  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2336  void
2338  using exec_space = typename Node::device_type::execution_space;
2339  if(lgMap_.extent(0) != lgMapHost_.extent(0)) {
2340  Tpetra::Details::ProfilingRegion pr("Map::lazyPushToHost() - pushing data");
2341  // NOTE: We check lgMap_ and not glMap_, since the latter can
2342  // be somewhat error prone for contiguous maps
2343 
2344  // create_mirror_view preserves const-ness. create_mirror does not
2345  auto lgMap_host = Kokkos::create_mirror(Kokkos::HostSpace (), lgMap_);
2346 
2347  // Since this was computed on the default stream, we can copy on the stream and then fence
2348  // the stream
2349  Kokkos::deep_copy(exec_space(),lgMap_host,lgMap_);
2350  exec_space().fence();
2351  lgMapHost_ = lgMap_host;
2352 
2353  // Make host version - when memory spaces match these just do trivial assignment
2354  glMapHost_ = global_to_local_table_host_type(glMap_);
2355  }
2356  }
2357 
2358 
2359  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2360  Teuchos::RCP<const Teuchos::Comm<int> >
2362  return comm_;
2363  }
2364 
2365  template <class LocalOrdinal,class GlobalOrdinal, class Node>
2366  bool
2368  checkIsDist() const
2369  {
2370  using Teuchos::as;
2371  using Teuchos::outArg;
2372  using Teuchos::REDUCE_MIN;
2373  using Teuchos::reduceAll;
2374  using std::endl;
2375 
2376  const bool verbose = Details::Behavior::verbose("Map");
2377  std::unique_ptr<std::string> prefix;
2378  if (verbose) {
2379  prefix = Details::createPrefix(
2380  comm_.getRawPtr(), "Map", "checkIsDist");
2381  std::ostringstream os;
2382  os << *prefix << "Start" << endl;
2383  std::cerr << os.str();
2384  }
2385 
2386  bool global = false;
2387  if (comm_->getSize () > 1) {
2388  // The communicator has more than one process, but that doesn't
2389  // necessarily mean the Map is distributed.
2390  int localRep = 0;
2391  if (numGlobalElements_ == as<global_size_t> (numLocalElements_)) {
2392  // The number of local elements on this process equals the
2393  // number of global elements.
2394  //
2395  // NOTE (mfh 22 Nov 2011) Does this still work if there were
2396  // duplicates in the global ID list on input (the third Map
2397  // constructor), so that the number of local elements (which
2398  // are not duplicated) on this process could be less than the
2399  // number of global elements, even if this process owns all
2400  // the elements?
2401  localRep = 1;
2402  }
2403  int allLocalRep;
2404  reduceAll<int, int> (*comm_, REDUCE_MIN, localRep, outArg (allLocalRep));
2405  if (allLocalRep != 1) {
2406  // At least one process does not own all the elements.
2407  // This makes the Map a distributed Map.
2408  global = true;
2409  }
2410  }
2411  // If the communicator has only one process, then the Map is not
2412  // distributed.
2413 
2414  if (verbose) {
2415  std::ostringstream os;
2416  os << *prefix << "Done; global=" << (global ? "true" : "false")
2417  << endl;
2418  std::cerr << os.str();
2419  }
2420  return global;
2421  }
2422 
2423 } // namespace Tpetra
2424 
2425 template <class LocalOrdinal, class GlobalOrdinal>
2426 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal> >
2427 Tpetra::createLocalMap (const size_t numElements,
2428  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2429 {
2430  typedef LocalOrdinal LO;
2431  typedef GlobalOrdinal GO;
2432  using NT = typename ::Tpetra::Map<LO, GO>::node_type;
2433  return createLocalMapWithNode<LO, GO, NT> (numElements, comm);
2434 }
2435 
2436 template <class LocalOrdinal, class GlobalOrdinal>
2437 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal> >
2438 Tpetra::createUniformContigMap (const global_size_t numElements,
2439  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2440 {
2441  typedef LocalOrdinal LO;
2442  typedef GlobalOrdinal GO;
2443  using NT = typename ::Tpetra::Map<LO, GO>::node_type;
2444  return createUniformContigMapWithNode<LO, GO, NT> (numElements, comm);
2445 }
2446 
2447 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2448 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node> >
2449 Tpetra::createUniformContigMapWithNode (const global_size_t numElements,
2450  const Teuchos::RCP<const Teuchos::Comm<int> >& comm
2451 )
2452 {
2453  using Teuchos::rcp;
2455  const GlobalOrdinal indexBase = static_cast<GlobalOrdinal> (0);
2456 
2457  return rcp (new map_type (numElements, indexBase, comm, GloballyDistributed));
2458 }
2459 
2460 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2461 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node> >
2462 Tpetra::createLocalMapWithNode (const size_t numElements,
2463  const Teuchos::RCP<const Teuchos::Comm<int> >& comm
2464 )
2465 {
2466  using Tpetra::global_size_t;
2467  using Teuchos::rcp;
2469  const GlobalOrdinal indexBase = 0;
2470  const global_size_t globalNumElts = static_cast<global_size_t> (numElements);
2471 
2472  return rcp (new map_type (globalNumElts, indexBase, comm, LocallyReplicated));
2473 }
2474 
2475 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2476 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node> >
2478  const size_t localNumElements,
2479  const Teuchos::RCP<const Teuchos::Comm<int> >& comm
2480 )
2481 {
2482  using Teuchos::rcp;
2484  const GlobalOrdinal indexBase = 0;
2485 
2486  return rcp (new map_type (numElements, localNumElements, indexBase, comm));
2487 }
2488 
2489 template <class LocalOrdinal, class GlobalOrdinal>
2490 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal> >
2492  const size_t localNumElements,
2493  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2494 {
2495  typedef LocalOrdinal LO;
2496  typedef GlobalOrdinal GO;
2497  using NT = typename Tpetra::Map<LO, GO>::node_type;
2498 
2499  return Tpetra::createContigMapWithNode<LO, GO, NT> (numElements, localNumElements, comm);
2500 }
2501 
2502 template <class LocalOrdinal, class GlobalOrdinal>
2503 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal> >
2504 Tpetra::createNonContigMap(const Teuchos::ArrayView<const GlobalOrdinal>& elementList,
2505  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2506 {
2507  typedef LocalOrdinal LO;
2508  typedef GlobalOrdinal GO;
2509  using NT = typename Tpetra::Map<LO, GO>::node_type;
2510 
2511  return Tpetra::createNonContigMapWithNode<LO, GO, NT> (elementList, comm);
2512 }
2513 
2514 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2515 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node> >
2516 Tpetra::createNonContigMapWithNode (const Teuchos::ArrayView<const GlobalOrdinal>& elementList,
2517  const Teuchos::RCP<const Teuchos::Comm<int> >& comm
2518 )
2519 {
2520  using Teuchos::rcp;
2522  using GST = Tpetra::global_size_t;
2523  const GST INV = Tpetra::Details::OrdinalTraits<GST>::invalid ();
2524  // FIXME (mfh 22 Jul 2016) This is what I found here, but maybe this
2525  // shouldn't be zero, given that the index base is supposed to equal
2526  // the globally min global index?
2527  const GlobalOrdinal indexBase = 0;
2528 
2529  return rcp (new map_type (INV, elementList, indexBase, comm));
2530 }
2531 
2532 template<class LO, class GO, class NT>
2533 Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >
2534 Tpetra::createOneToOne (const Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >& M)
2535 {
2537  using Teuchos::Array;
2538  using Teuchos::ArrayView;
2539  using Teuchos::as;
2540  using Teuchos::rcp;
2541  using std::cerr;
2542  using std::endl;
2543  using map_type = Tpetra::Map<LO, GO, NT>;
2544  using GST = global_size_t;
2545 
2546  const bool verbose = Details::Behavior::verbose("Map");
2547  std::unique_ptr<std::string> prefix;
2548  if (verbose) {
2549  auto comm = M.is_null() ? Teuchos::null : M->getComm();
2550  prefix = Details::createPrefix(
2551  comm.getRawPtr(), "createOneToOne(Map)");
2552  std::ostringstream os;
2553  os << *prefix << "Start" << endl;
2554  cerr << os.str();
2555  }
2556  const size_t maxNumToPrint = verbose ?
2557  Details::Behavior::verbosePrintCountThreshold() : size_t(0);
2558  const GST GINV = Tpetra::Details::OrdinalTraits<GST>::invalid ();
2559  const int myRank = M->getComm ()->getRank ();
2560 
2561  // Bypasses for special cases where either M is known to be
2562  // one-to-one, or the one-to-one version of M is easy to compute.
2563  // This is why we take M as an RCP, not as a const reference -- so
2564  // that we can return M itself if it is 1-to-1.
2565  if (! M->isDistributed ()) {
2566  // For a locally replicated Map, we assume that users want to push
2567  // all the GIDs to Process 0.
2568 
2569  // mfh 05 Nov 2013: getGlobalNumElements() does indeed return what
2570  // you think it should return, in this special case of a locally
2571  // replicated contiguous Map.
2572  const GST numGlobalEntries = M->getGlobalNumElements ();
2573  if (M->isContiguous()) {
2574  const size_t numLocalEntries =
2575  (myRank == 0) ? as<size_t>(numGlobalEntries) : size_t(0);
2576  if (verbose) {
2577  std::ostringstream os;
2578  os << *prefix << "Input is locally replicated & contiguous; "
2579  "numLocalEntries=" << numLocalEntries << endl;
2580  cerr << os.str ();
2581  }
2582  auto retMap =
2583  rcp(new map_type(numGlobalEntries, numLocalEntries,
2584  M->getIndexBase(), M->getComm()));
2585  if (verbose) {
2586  std::ostringstream os;
2587  os << *prefix << "Done" << endl;
2588  cerr << os.str ();
2589  }
2590  return retMap;
2591  }
2592  else {
2593  if (verbose) {
2594  std::ostringstream os;
2595  os << *prefix << "Input is locally replicated & noncontiguous"
2596  << endl;
2597  cerr << os.str ();
2598  }
2599  ArrayView<const GO> myGids =
2600  (myRank == 0) ? M->getLocalElementList() : Teuchos::null;
2601  auto retMap =
2602  rcp(new map_type(GINV, myGids(), M->getIndexBase(),
2603  M->getComm()));
2604  if (verbose) {
2605  std::ostringstream os;
2606  os << *prefix << "Done" << endl;
2607  cerr << os.str ();
2608  }
2609  return retMap;
2610  }
2611  }
2612  else if (M->isContiguous ()) {
2613  if (verbose) {
2614  std::ostringstream os;
2615  os << *prefix << "Input is distributed & contiguous" << endl;
2616  cerr << os.str ();
2617  }
2618  // Contiguous, distributed Maps are one-to-one by construction.
2619  // (Locally replicated Maps can be contiguous.)
2620  return M;
2621  }
2622  else {
2623  if (verbose) {
2624  std::ostringstream os;
2625  os << *prefix << "Input is distributed & noncontiguous" << endl;
2626  cerr << os.str ();
2627  }
2629  const size_t numMyElems = M->getLocalNumElements ();
2630  ArrayView<const GO> myElems = M->getLocalElementList ();
2631  Array<int> owner_procs_vec (numMyElems);
2632 
2633  if (verbose) {
2634  std::ostringstream os;
2635  os << *prefix << "Call Directory::getDirectoryEntries: ";
2636  verbosePrintArray(os, myElems, "GIDs", maxNumToPrint);
2637  os << endl;
2638  cerr << os.str();
2639  }
2640  directory.getDirectoryEntries (*M, myElems, owner_procs_vec ());
2641  if (verbose) {
2642  std::ostringstream os;
2643  os << *prefix << "getDirectoryEntries result: ";
2644  verbosePrintArray(os, owner_procs_vec, "PIDs", maxNumToPrint);
2645  os << endl;
2646  cerr << os.str();
2647  }
2648 
2649  Array<GO> myOwned_vec (numMyElems);
2650  size_t numMyOwnedElems = 0;
2651  for (size_t i = 0; i < numMyElems; ++i) {
2652  const GO GID = myElems[i];
2653  const int owner = owner_procs_vec[i];
2654 
2655  if (myRank == owner) {
2656  myOwned_vec[numMyOwnedElems++] = GID;
2657  }
2658  }
2659  myOwned_vec.resize (numMyOwnedElems);
2660 
2661  if (verbose) {
2662  std::ostringstream os;
2663  os << *prefix << "Create Map: ";
2664  verbosePrintArray(os, myOwned_vec, "GIDs", maxNumToPrint);
2665  os << endl;
2666  cerr << os.str();
2667  }
2668  auto retMap = rcp(new map_type(GINV, myOwned_vec(),
2669  M->getIndexBase(), M->getComm()));
2670  if (verbose) {
2671  std::ostringstream os;
2672  os << *prefix << "Done" << endl;
2673  cerr << os.str();
2674  }
2675  return retMap;
2676  }
2677 }
2678 
2679 template<class LocalOrdinal, class GlobalOrdinal, class Node>
2680 Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >
2683 {
2684  using Details::Behavior;
2686  using Teuchos::Array;
2687  using Teuchos::ArrayView;
2688  using Teuchos::RCP;
2689  using Teuchos::rcp;
2690  using Teuchos::toString;
2691  using std::cerr;
2692  using std::endl;
2693  using LO = LocalOrdinal;
2694  using GO = GlobalOrdinal;
2695  using map_type = Tpetra::Map<LO, GO, Node>;
2696 
2697  const bool verbose = Behavior::verbose("Map");
2698  std::unique_ptr<std::string> prefix;
2699  if (verbose) {
2700  auto comm = M.is_null() ? Teuchos::null : M->getComm();
2701  prefix = Details::createPrefix(
2702  comm.getRawPtr(), "createOneToOne(Map,TieBreak)");
2703  std::ostringstream os;
2704  os << *prefix << "Start" << endl;
2705  cerr << os.str();
2706  }
2707  const size_t maxNumToPrint = verbose ?
2708  Behavior::verbosePrintCountThreshold() : size_t(0);
2709 
2710  // FIXME (mfh 20 Feb 2013) We should have a bypass for contiguous
2711  // Maps (which are 1-to-1 by construction).
2712 
2714  if (verbose) {
2715  std::ostringstream os;
2716  os << *prefix << "Initialize Directory" << endl;
2717  cerr << os.str ();
2718  }
2719  directory.initialize (*M, tie_break);
2720  if (verbose) {
2721  std::ostringstream os;
2722  os << *prefix << "Done initializing Directory" << endl;
2723  cerr << os.str ();
2724  }
2725  size_t numMyElems = M->getLocalNumElements ();
2726  ArrayView<const GO> myElems = M->getLocalElementList ();
2727  Array<int> owner_procs_vec (numMyElems);
2728  if (verbose) {
2729  std::ostringstream os;
2730  os << *prefix << "Call Directory::getDirectoryEntries: ";
2731  verbosePrintArray(os, myElems, "GIDs", maxNumToPrint);
2732  os << endl;
2733  cerr << os.str();
2734  }
2735  directory.getDirectoryEntries (*M, myElems, owner_procs_vec ());
2736  if (verbose) {
2737  std::ostringstream os;
2738  os << *prefix << "getDirectoryEntries result: ";
2739  verbosePrintArray(os, owner_procs_vec, "PIDs", maxNumToPrint);
2740  os << endl;
2741  cerr << os.str();
2742  }
2743 
2744  const int myRank = M->getComm()->getRank();
2745  Array<GO> myOwned_vec (numMyElems);
2746  size_t numMyOwnedElems = 0;
2747  for (size_t i = 0; i < numMyElems; ++i) {
2748  const GO GID = myElems[i];
2749  const int owner = owner_procs_vec[i];
2750  if (myRank == owner) {
2751  myOwned_vec[numMyOwnedElems++] = GID;
2752  }
2753  }
2754  myOwned_vec.resize (numMyOwnedElems);
2755 
2756  // FIXME (mfh 08 May 2014) The above Directory should be perfectly
2757  // valid for the new Map. Why can't we reuse it?
2758  const global_size_t GINV =
2759  Tpetra::Details::OrdinalTraits<global_size_t>::invalid ();
2760  if (verbose) {
2761  std::ostringstream os;
2762  os << *prefix << "Create Map: ";
2763  verbosePrintArray(os, myOwned_vec, "GIDs", maxNumToPrint);
2764  os << endl;
2765  cerr << os.str();
2766  }
2767  RCP<const map_type> retMap
2768  (new map_type (GINV, myOwned_vec (), M->getIndexBase (),
2769  M->getComm ()));
2770  if (verbose) {
2771  std::ostringstream os;
2772  os << *prefix << "Done" << endl;
2773  cerr << os.str();
2774  }
2775  return retMap;
2776 }
2777 
2778 //
2779 // Explicit instantiation macro
2780 //
2781 // Must be expanded from within the Tpetra namespace!
2782 //
2783 
2785 
2786 #define TPETRA_MAP_INSTANT(LO,GO,NODE) \
2787  \
2788  template class Map< LO , GO , NODE >; \
2789  \
2790  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2791  createLocalMapWithNode<LO,GO,NODE> (const size_t numElements, \
2792  const Teuchos::RCP< const Teuchos::Comm< int > >& comm); \
2793  \
2794  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2795  createContigMapWithNode<LO,GO,NODE> (const global_size_t numElements, \
2796  const size_t localNumElements, \
2797  const Teuchos::RCP< const Teuchos::Comm< int > >& comm); \
2798  \
2799  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2800  createNonContigMapWithNode(const Teuchos::ArrayView<const GO> &elementList, \
2801  const Teuchos::RCP<const Teuchos::Comm<int> > &comm); \
2802  \
2803  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2804  createUniformContigMapWithNode<LO,GO,NODE> (const global_size_t numElements, \
2805  const Teuchos::RCP< const Teuchos::Comm< int > >& comm); \
2806  \
2807  template Teuchos::RCP<const Map<LO,GO,NODE> > \
2808  createOneToOne (const Teuchos::RCP<const Map<LO,GO,NODE> >& M); \
2809  \
2810  template Teuchos::RCP<const Map<LO,GO,NODE> > \
2811  createOneToOne (const Teuchos::RCP<const Map<LO,GO,NODE> >& M, \
2812  const Tpetra::Details::TieBreak<LO,GO>& tie_break); \
2813 
2814 
2816 #define TPETRA_MAP_INSTANT_DEFAULTNODE(LO,GO) \
2817  template Teuchos::RCP< const Map<LO,GO> > \
2818  createLocalMap<LO,GO>( const size_t, const Teuchos::RCP< const Teuchos::Comm< int > > &); \
2819  \
2820  template Teuchos::RCP< const Map<LO,GO> > \
2821  createContigMap<LO,GO>( global_size_t, size_t, \
2822  const Teuchos::RCP< const Teuchos::Comm< int > > &); \
2823  \
2824  template Teuchos::RCP< const Map<LO,GO> > \
2825  createNonContigMap(const Teuchos::ArrayView<const GO> &, \
2826  const Teuchos::RCP<const Teuchos::Comm<int> > &); \
2827  \
2828  template Teuchos::RCP< const Map<LO,GO> > \
2829  createUniformContigMap<LO,GO>( const global_size_t, \
2830  const Teuchos::RCP< const Teuchos::Comm< int > > &); \
2831 
2832 #endif // TPETRA_MAP_DEF_HPP
Interface for breaking ties in ownership.
bool congruent(const Teuchos::Comm< int > &comm1, const Teuchos::Comm< int > &comm2)
Whether the two communicators are congruent.
Definition: Tpetra_Util.cpp:34
global_ordinal_type getMaxGlobalIndex() const
The maximum global index owned by the calling process.
LocalOrdinal local_ordinal_type
The type of local indices.
Declaration of Tpetra::Details::extractMpiCommFromTeuchos.
virtual ~Map()
Destructor (virtual for memory safety of derived classes).
GlobalOrdinal global_ordinal_type
The type of global indices.
bool isSameAs(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is identical to this Map.
Declaration of Tpetra::Details::printOnce.
global_indices_array_type getMyGlobalIndices() const
Return a view of the global indices owned by this process.
bool isContiguous() const
True if this Map is distributed contiguously, else false.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
Declaration of a function that prints strings from each process.
size_t getLocalNumElements() const
The number of elements belonging to the calling process.
bool isLocallyFitted(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is locally fitted to this Map.
bool isUniform() const
Whether the range of global indices is uniform.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createContigMapWithNode(const global_size_t numElements, const size_t localNumElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a (potentially) nonuniformly distributed, contiguous Map for a user-specifi...
global_indices_array_device_type getMyGlobalIndicesDevice() const
Return a view of the global indices owned by this process on the Map&#39;s device.
bool isDistributed() const
Whether this Map is globally distributed or locally replicated.
Teuchos::RCP< const Map< local_ordinal_type, global_ordinal_type, Node > > removeEmptyProcesses() const
Advanced methods.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
static bool debug()
Whether Tpetra is in debug mode.
static void reject_unrecognized_env_vars()
Search the environment for TPETRA_ variables and reject unrecognized ones.
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
int local_ordinal_type
Default value of Scalar template parameter.
&quot;Local&quot; part of Map suitable for Kokkos kernels.
Declaration of Tpetra::Details::initializeKokkos.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator, in rank order.
Teuchos::ArrayView< const global_ordinal_type > getLocalElementList() const
Return a NONOWNING view of the global indices owned by this process.
void initializeKokkos()
Initialize Kokkos, using command-line arguments (if any) given to Teuchos::GlobalMPISession.
size_t global_size_t
Global size_t object.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
bool isCompatible(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is compatible with this Map.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified, possibly nondefault Kokkos Node type.
global_ordinal_type getIndexBase() const
The index base for this Map.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
Teuchos::RCP< const Map< local_ordinal_type, global_ordinal_type, Node > > replaceCommWithSubset(const Teuchos::RCP< const Teuchos::Comm< int > > &newComm) const
Replace this Map&#39;s communicator with a subset communicator.
Functions for initializing and finalizing Tpetra.
bool isNodeGlobalElement(global_ordinal_type globalIndex) const
Whether the given global index is owned by this Map on the calling process.
static bool verbose()
Whether Tpetra is in verbose mode.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createUniformContigMap(const global_size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a uniformly distributed, contiguous Map with the default Kokkos Node...
LookupStatus getDirectoryEntries(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs) const
Given a global ID list, return the list of their owning process IDs.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createContigMap(const global_size_t numElements, const size_t localNumElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a (potentially) non-uniformly distributed, contiguous Map using the defaul...
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createLocalMap(const size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a locally replicated Map with the default Kokkos Node.
local_map_type getLocalMap() const
Get the LocalMap for Kokkos-Kernels.
global_ordinal_type getMinGlobalIndex() const
The minimum global index owned by the calling process.
typename device_type::execution_space execution_space
The Kokkos execution space.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createUniformContigMapWithNode(const global_size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a uniformly distributed, contiguous Map with a user-specified Kokkos Node...
A parallel distribution of indices over processes.
global_ordinal_type getMaxAllGlobalIndex() const
The maximum global index over all processes in the communicator.
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
Implement mapping from global ID to process ID and local ID.
Stand-alone utility functions and macros.
bool isOneToOne() const
Whether the Map is one to one.
void initialize(const map_type &map)
Initialize the Directory with its Map.
bool locallySameAs(const Map< local_ordinal_type, global_ordinal_type, node_type > &map) const
Is this Map locally the same as the input Map?
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createLocalMapWithNode(const size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a locally replicated Map with a specified Kokkos Node.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
typename Node::device_type device_type
This class&#39; Kokkos::Device specialization.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createNonContigMap(const Teuchos::ArrayView< const GlobalOrdinal > &elementList, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a non-contiguous Map using the default Kokkos::Device type.
LocalGlobal
Enum for local versus global allocation of Map entries.
LookupStatus getRemoteIndexList(const Teuchos::ArrayView< const global_ordinal_type > &GIDList, const Teuchos::ArrayView< int > &nodeIDList, const Teuchos::ArrayView< local_ordinal_type > &LIDList) const
Return the process ranks and corresponding local indices for the given global indices.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Describe this object in a human-readable way to the given output stream.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createNonContigMapWithNode(const Teuchos::ArrayView< const GlobalOrdinal > &elementList, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a noncontiguous Map with a user-specified, possibly nondefault Kokkos Node ...
Node node_type
Legacy typedef that will go away at some point.
global_ordinal_type getMinAllGlobalIndex() const
The minimum global index over all processes in the communicator.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
Definition: Tpetra_Util.cpp:71
global_size_t getGlobalNumElements() const
The number of elements in this Map.
Description of Tpetra&#39;s behavior.
std::string description() const
Implementation of Teuchos::Describable.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.
Map()
Default constructor (that does nothing).