Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Map_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ************************************************************************
38 // @HEADER
39 
44 
45 #ifndef TPETRA_MAP_DEF_HPP
46 #define TPETRA_MAP_DEF_HPP
47 
48 #include <memory>
49 #include <sstream>
50 #include <stdexcept>
51 #include <typeinfo>
52 
53 #include "Teuchos_as.hpp"
54 #include "Teuchos_TypeNameTraits.hpp"
55 #include "Teuchos_CommHelpers.hpp"
56 
57 #include "Tpetra_Directory.hpp" // must include for implicit instantiation to work
59 #include "Tpetra_Details_FixedHashTable.hpp"
62 #include "Tpetra_Core.hpp"
63 #include "Tpetra_Util.hpp"
64 #include "Tpetra_Details_mpiIsInitialized.hpp"
65 #include "Tpetra_Details_extractMpiCommFromTeuchos.hpp" // teuchosCommIsAnMpiComm
68 
69 namespace { // (anonymous)
70 
71  void
72  checkMapInputArray (const char ctorName[],
73  const void* indexList,
74  const size_t indexListSize,
75  const Teuchos::Comm<int>* const comm)
76  {
78 
79  const bool debug = Behavior::debug("Map");
80  if (debug) {
81  using Teuchos::outArg;
82  using Teuchos::REDUCE_MIN;
83  using Teuchos::reduceAll;
84  using std::endl;
85 
86  const int myRank = comm == nullptr ? 0 : comm->getRank ();
87  const bool verbose = Behavior::verbose("Map");
88  std::ostringstream lclErrStrm;
89  int lclSuccess = 1;
90 
91  if (indexListSize != 0 && indexList == nullptr) {
92  lclSuccess = 0;
93  if (verbose) {
94  lclErrStrm << "Proc " << myRank << ": indexList is null, "
95  "but indexListSize=" << indexListSize << " != 0." << endl;
96  }
97  }
98  int gblSuccess = 0; // output argument
99  reduceAll (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
100  if (gblSuccess != 1) {
101  std::ostringstream gblErrStrm;
102  gblErrStrm << "Tpetra::Map constructor " << ctorName <<
103  " detected a problem with the input array "
104  "(raw array, Teuchos::ArrayView, or Kokkos::View) "
105  "of global indices." << endl;
106  if (verbose) {
107  using ::Tpetra::Details::gathervPrint;
108  gathervPrint (gblErrStrm, lclErrStrm.str (), *comm);
109  }
110  TEUCHOS_TEST_FOR_EXCEPTION
111  (true, std::invalid_argument, gblErrStrm.str ());
112  }
113  }
114  }
115 
116 
117 
118 
119  template <class LocalOrdinal, class GlobalOrdinal, class ViewType>
120  void computeConstantsOnDevice(const ViewType& entryList, GlobalOrdinal & minMyGID, GlobalOrdinal & maxMyGID, GlobalOrdinal &firstContiguousGID, GlobalOrdinal &lastContiguousGID_val, LocalOrdinal &lastContiguousGID_loc) {
121  using LO = LocalOrdinal;
122  using GO = GlobalOrdinal;
123  using exec_space = typename ViewType::device_type::execution_space;
124  using range_policy = Kokkos::RangePolicy<exec_space, Kokkos::IndexType<LO> >;
125  const LO numLocalElements = entryList.extent(0);
126 
127  // We're going to use the minloc backwards because we need to have it sort on the "location" and have the "value" along for the
128  // ride, rather than the other way around
129  typedef typename Kokkos::MinLoc<LO,GO>::value_type minloc_type;
130  minloc_type myMinLoc;
131 
132  // Find the initial sequence of parallel gids
133  // To find the lastContiguousGID_, we find the first guy where entryList[i] - entryList[0] != i-0. That's the first non-contiguous guy.
134  // We want the one *before* that guy.
135  Kokkos::parallel_reduce(range_policy(0,numLocalElements),KOKKOS_LAMBDA(const LO & i, GO &l_myMin, GO&l_myMax, GO& l_firstCont, minloc_type & l_lastCont){
136  GO entry_0 = entryList[0];
137  GO entry_i = entryList[i];
138 
139  // Easy stuff
140  l_myMin = (l_myMin < entry_i) ? l_myMin : entry_i;
141  l_myMax = (l_myMax > entry_i) ? l_myMax : entry_i;
142  l_firstCont = entry_0;
143 
144  if(entry_i - entry_0 != i && l_lastCont.val >= i) {
145  // We're non-contiguous, so the guy before us could be the last contiguous guy
146  l_lastCont.val = i-1;
147  l_lastCont.loc = entryList[i-1];
148  }
149  else if (i == numLocalElements-1 && i < l_lastCont.val) {
150  // If we're last, we always think we're the last contiguous guy, unless someone non-contiguous is already here
151  l_lastCont.val = i;
152  l_lastCont.loc = entry_i;
153  }
154 
155  },Kokkos::Min<GO>(minMyGID),Kokkos::Max<GO>(maxMyGID),Kokkos::Min<GO>(firstContiguousGID),Kokkos::MinLoc<LO,GO>(myMinLoc));
156 
157  // This switch is intentional, since we're using MinLoc backwards
158  lastContiguousGID_val = myMinLoc.loc;
159  lastContiguousGID_loc = myMinLoc.val;
160  }
161 
162 
163 } // namespace (anonymous)
164 
165 namespace Tpetra {
166 
167  template <class LocalOrdinal, class GlobalOrdinal, class Node>
169  Map () :
170  comm_ (new Teuchos::SerialComm<int> ()),
171  indexBase_ (0),
172  numGlobalElements_ (0),
173  numLocalElements_ (0),
174  minMyGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
175  maxMyGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
176  minAllGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
177  maxAllGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
178  firstContiguousGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
179  lastContiguousGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
180  uniform_ (false), // trivially
181  contiguous_ (false),
182  distributed_ (false), // no communicator yet
183  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
184  {
187  }
188 
189 
190  template <class LocalOrdinal, class GlobalOrdinal, class Node>
192  Map (const global_size_t numGlobalElements,
193  const global_ordinal_type indexBase,
194  const Teuchos::RCP<const Teuchos::Comm<int> > &comm,
195  const LocalGlobal lOrG) :
196  comm_ (comm),
197  uniform_ (true),
198  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
199  {
200  using Teuchos::as;
201  using Teuchos::broadcast;
202  using Teuchos::outArg;
203  using Teuchos::reduceAll;
204  using Teuchos::REDUCE_MIN;
205  using Teuchos::REDUCE_MAX;
206  using Teuchos::typeName;
207  using std::endl;
208  using GO = global_ordinal_type;
209  using GST = global_size_t;
210  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
211  const char funcName[] = "Map(gblNumInds,indexBase,comm,LG)";
212  const char exPfx[] =
213  "Tpetra::Map::Map(gblNumInds,indexBase,comm,LG): ";
214 
215  const bool debug = Details::Behavior::debug("Map");
216  const bool verbose = Details::Behavior::verbose("Map");
217  std::unique_ptr<std::string> prefix;
218  if (verbose) {
219  prefix = Details::createPrefix(
220  comm_.getRawPtr(), "Map", funcName);
221  std::ostringstream os;
222  os << *prefix << "Start" << endl;
223  std::cerr << os.str();
224  }
227 
228  // In debug mode only, check whether numGlobalElements and
229  // indexBase are the same over all processes in the communicator.
230  if (debug) {
231  GST proc0NumGlobalElements = numGlobalElements;
232  broadcast(*comm_, 0, outArg(proc0NumGlobalElements));
233  GST minNumGlobalElements = numGlobalElements;
234  GST maxNumGlobalElements = numGlobalElements;
235  reduceAll(*comm, REDUCE_MIN, numGlobalElements,
236  outArg(minNumGlobalElements));
237  reduceAll(*comm, REDUCE_MAX, numGlobalElements,
238  outArg(maxNumGlobalElements));
239  TEUCHOS_TEST_FOR_EXCEPTION
240  (minNumGlobalElements != maxNumGlobalElements ||
241  numGlobalElements != minNumGlobalElements,
242  std::invalid_argument, exPfx << "All processes must "
243  "provide the same number of global elements. Process 0 set "
244  "numGlobalElements="<< proc0NumGlobalElements << ". The "
245  "calling process " << comm->getRank() << " set "
246  "numGlobalElements=" << numGlobalElements << ". The min "
247  "and max values over all processes are "
248  << minNumGlobalElements << " resp. " << maxNumGlobalElements
249  << ".");
250 
251  GO proc0IndexBase = indexBase;
252  broadcast<int, GO> (*comm_, 0, outArg (proc0IndexBase));
253  GO minIndexBase = indexBase;
254  GO maxIndexBase = indexBase;
255  reduceAll(*comm, REDUCE_MIN, indexBase, outArg(minIndexBase));
256  reduceAll(*comm, REDUCE_MAX, indexBase, outArg(maxIndexBase));
257  TEUCHOS_TEST_FOR_EXCEPTION
258  (minIndexBase != maxIndexBase || indexBase != minIndexBase,
259  std::invalid_argument, exPfx << "All processes must "
260  "provide the same indexBase argument. Process 0 set "
261  "indexBase=" << proc0IndexBase << ". The calling process "
262  << comm->getRank() << " set indexBase=" << indexBase
263  << ". The min and max values over all processes are "
264  << minIndexBase << " resp. " << maxIndexBase << ".");
265  }
266 
267  // Distribute the elements across the processes in the given
268  // communicator so that global IDs (GIDs) are
269  //
270  // - Nonoverlapping (only one process owns each GID)
271  // - Contiguous (the sequence of GIDs is nondecreasing, and no two
272  // adjacent GIDs differ by more than one)
273  // - As evenly distributed as possible (the numbers of GIDs on two
274  // different processes do not differ by more than one)
275 
276  // All processes have the same numGlobalElements, but we still
277  // need to check that it is valid. numGlobalElements must be
278  // positive and not the "invalid" value (GSTI).
279  //
280  // This comparison looks funny, but it avoids compiler warnings
281  // for comparing unsigned integers (numGlobalElements_in is a
282  // GST, which is unsigned) while still working if we
283  // later decide to make GST signed.
284  TEUCHOS_TEST_FOR_EXCEPTION(
285  (numGlobalElements < 1 && numGlobalElements != 0),
286  std::invalid_argument, exPfx << "numGlobalElements (= "
287  << numGlobalElements << ") must be nonnegative.");
288 
289  TEUCHOS_TEST_FOR_EXCEPTION
290  (numGlobalElements == GSTI, std::invalid_argument, exPfx <<
291  "You provided numGlobalElements = Teuchos::OrdinalTraits<"
292  "Tpetra::global_size_t>::invalid(). This version of the "
293  "constructor requires a valid value of numGlobalElements. "
294  "You probably mistook this constructor for the \"contiguous "
295  "nonuniform\" constructor, which can compute the global "
296  "number of elements for you if you set numGlobalElements to "
297  "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid().");
298 
299  size_t numLocalElements = 0; // will set below
300  if (lOrG == GloballyDistributed) {
301  // Compute numLocalElements:
302  //
303  // If numGlobalElements == numProcs * B + remainder,
304  // then Proc r gets B+1 elements if r < remainder,
305  // and B elements if r >= remainder.
306  //
307  // This strategy is valid for any value of numGlobalElements and
308  // numProcs, including the following border cases:
309  // - numProcs == 1
310  // - numLocalElements < numProcs
311  //
312  // In the former case, remainder == 0 && numGlobalElements ==
313  // numLocalElements. In the latter case, remainder ==
314  // numGlobalElements && numLocalElements is either 0 or 1.
315  const GST numProcs = static_cast<GST> (comm_->getSize ());
316  const GST myRank = static_cast<GST> (comm_->getRank ());
317  const GST quotient = numGlobalElements / numProcs;
318  const GST remainder = numGlobalElements - quotient * numProcs;
319 
320  GO startIndex;
321  if (myRank < remainder) {
322  numLocalElements = static_cast<size_t> (1) + static_cast<size_t> (quotient);
323  // myRank was originally an int, so it should never overflow
324  // reasonable GO types.
325  startIndex = as<GO> (myRank) * as<GO> (numLocalElements);
326  } else {
327  numLocalElements = as<size_t> (quotient);
328  startIndex = as<GO> (myRank) * as<GO> (numLocalElements) +
329  as<GO> (remainder);
330  }
331 
332  minMyGID_ = indexBase + startIndex;
333  maxMyGID_ = indexBase + startIndex + numLocalElements - 1;
334  minAllGID_ = indexBase;
335  maxAllGID_ = indexBase + numGlobalElements - 1;
336  distributed_ = (numProcs > 1);
337  }
338  else { // lOrG == LocallyReplicated
339  numLocalElements = as<size_t> (numGlobalElements);
340  minMyGID_ = indexBase;
341  maxMyGID_ = indexBase + numGlobalElements - 1;
342  distributed_ = false;
343  }
344 
345  minAllGID_ = indexBase;
346  maxAllGID_ = indexBase + numGlobalElements - 1;
347  indexBase_ = indexBase;
348  numGlobalElements_ = numGlobalElements;
349  numLocalElements_ = numLocalElements;
350  firstContiguousGID_ = minMyGID_;
351  lastContiguousGID_ = maxMyGID_;
352  contiguous_ = true;
353 
354  // Create the Directory on demand in getRemoteIndexList().
355  //setupDirectory ();
356 
357  if (verbose) {
358  std::ostringstream os;
359  os << *prefix << "Done" << endl;
360  std::cerr << os.str();
361  }
362  }
363 
364 
365  template <class LocalOrdinal, class GlobalOrdinal, class Node>
367  Map (const global_size_t numGlobalElements,
368  const size_t numLocalElements,
369  const global_ordinal_type indexBase,
370  const Teuchos::RCP<const Teuchos::Comm<int> > &comm) :
371  comm_ (comm),
372  uniform_ (false),
373  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
374  {
375  using Teuchos::as;
376  using Teuchos::broadcast;
377  using Teuchos::outArg;
378  using Teuchos::reduceAll;
379  using Teuchos::REDUCE_MIN;
380  using Teuchos::REDUCE_MAX;
381  using Teuchos::REDUCE_SUM;
382  using Teuchos::scan;
383  using std::endl;
384  using GO = global_ordinal_type;
385  using GST = global_size_t;
386  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
387  const char funcName[] =
388  "Map(gblNumInds,lclNumInds,indexBase,comm)";
389  const char exPfx[] =
390  "Tpetra::Map::Map(gblNumInds,lclNumInds,indexBase,comm): ";
391  const char suffix[] =
392  ". Please report this bug to the Tpetra developers.";
393 
394  const bool debug = Details::Behavior::debug("Map");
395  const bool verbose = Details::Behavior::verbose("Map");
396  std::unique_ptr<std::string> prefix;
397  if (verbose) {
398  prefix = Details::createPrefix(
399  comm_.getRawPtr(), "Map", funcName);
400  std::ostringstream os;
401  os << *prefix << "Start" << endl;
402  std::cerr << os.str();
403  }
406 
407  // Global sum of numLocalElements over all processes.
408  // Keep this for later debug checks.
409  GST debugGlobalSum {};
410  if (debug) {
411  debugGlobalSum = initialNonuniformDebugCheck(exPfx,
412  numGlobalElements, numLocalElements, indexBase, comm);
413  }
414 
415  // Distribute the elements across the nodes so that they are
416  // - non-overlapping
417  // - contiguous
418 
419  // This differs from the first Map constructor (that only takes a
420  // global number of elements) in that the user has specified the
421  // number of local elements, so that the elements are not
422  // (necessarily) evenly distributed over the processes.
423 
424  // Compute my local offset. This is an inclusive scan, so to get
425  // the final offset, we subtract off the input.
426  GO scanResult = 0;
427  scan<int, GO> (*comm, REDUCE_SUM, numLocalElements, outArg (scanResult));
428  const GO myOffset = scanResult - numLocalElements;
429 
430  if (numGlobalElements != GSTI) {
431  numGlobalElements_ = numGlobalElements; // Use the user's value.
432  }
433  else {
434  // Inclusive scan means that the last process has the final sum.
435  // Rather than doing a reduceAll to get the sum of
436  // numLocalElements, we can just have the last process broadcast
437  // its result. That saves us a round of log(numProcs) messages.
438  const int numProcs = comm->getSize ();
439  GST globalSum = scanResult;
440  if (numProcs > 1) {
441  broadcast (*comm, numProcs - 1, outArg (globalSum));
442  }
443  numGlobalElements_ = globalSum;
444 
445  if (debug) {
446  // No need for an all-reduce here; both come from collectives.
447  TEUCHOS_TEST_FOR_EXCEPTION
448  (globalSum != debugGlobalSum, std::logic_error, exPfx <<
449  "globalSum = " << globalSum << " != debugGlobalSum = " <<
450  debugGlobalSum << suffix);
451  }
452  }
453  numLocalElements_ = numLocalElements;
454  indexBase_ = indexBase;
455  minAllGID_ = (numGlobalElements_ == 0) ?
456  std::numeric_limits<GO>::max () :
457  indexBase;
458  maxAllGID_ = (numGlobalElements_ == 0) ?
459  std::numeric_limits<GO>::lowest () :
460  indexBase + GO(numGlobalElements_) - GO(1);
461  minMyGID_ = (numLocalElements_ == 0) ?
462  std::numeric_limits<GO>::max () :
463  indexBase + GO(myOffset);
464  maxMyGID_ = (numLocalElements_ == 0) ?
465  std::numeric_limits<GO>::lowest () :
466  indexBase + myOffset + GO(numLocalElements) - GO(1);
467  firstContiguousGID_ = minMyGID_;
468  lastContiguousGID_ = maxMyGID_;
469  contiguous_ = true;
470  distributed_ = checkIsDist ();
471 
472  // Create the Directory on demand in getRemoteIndexList().
473  //setupDirectory ();
474 
475  if (verbose) {
476  std::ostringstream os;
477  os << *prefix << "Done" << endl;
478  std::cerr << os.str();
479  }
480  }
481 
482  template <class LocalOrdinal, class GlobalOrdinal, class Node>
486  const char errorMessagePrefix[],
487  const global_size_t numGlobalElements,
488  const size_t numLocalElements,
489  const global_ordinal_type indexBase,
490  const Teuchos::RCP<const Teuchos::Comm<int>>& comm) const
491  {
492  const bool debug = Details::Behavior::debug("Map");
493  if (! debug) {
494  return global_size_t(0);
495  }
496 
497  using Teuchos::broadcast;
498  using Teuchos::outArg;
499  using Teuchos::ptr;
500  using Teuchos::REDUCE_MAX;
501  using Teuchos::REDUCE_MIN;
502  using Teuchos::REDUCE_SUM;
503  using Teuchos::reduceAll;
504  using GO = global_ordinal_type;
505  using GST = global_size_t;
506  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
507 
508  // The user has specified the distribution of indices over the
509  // processes. The distribution is not necessarily contiguous or
510  // equally shared over the processes.
511  //
512  // We assume that the number of local elements can be stored in a
513  // size_t. The instance member numLocalElements_ is a size_t, so
514  // this variable and that should have the same type.
515 
516  GST debugGlobalSum = 0; // Will be global sum of numLocalElements
517  reduceAll<int, GST> (*comm, REDUCE_SUM, static_cast<GST> (numLocalElements),
518  outArg (debugGlobalSum));
519  // In debug mode only, check whether numGlobalElements and
520  // indexBase are the same over all processes in the communicator.
521  {
522  GST proc0NumGlobalElements = numGlobalElements;
523  broadcast<int, GST> (*comm_, 0, outArg (proc0NumGlobalElements));
524  GST minNumGlobalElements = numGlobalElements;
525  GST maxNumGlobalElements = numGlobalElements;
526  reduceAll<int, GST> (*comm, REDUCE_MIN, numGlobalElements,
527  outArg (minNumGlobalElements));
528  reduceAll<int, GST> (*comm, REDUCE_MAX, numGlobalElements,
529  outArg (maxNumGlobalElements));
530  TEUCHOS_TEST_FOR_EXCEPTION
531  (minNumGlobalElements != maxNumGlobalElements ||
532  numGlobalElements != minNumGlobalElements,
533  std::invalid_argument, errorMessagePrefix << "All processes "
534  "must provide the same number of global elements, even if "
535  "that argument is "
536  "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid() "
537  "(which signals that the Map should compute the global "
538  "number of elements). Process 0 set numGlobalElements"
539  "=" << proc0NumGlobalElements << ". The calling process "
540  << comm->getRank() << " set numGlobalElements=" <<
541  numGlobalElements << ". The min and max values over all "
542  "processes are " << minNumGlobalElements << " resp. " <<
543  maxNumGlobalElements << ".");
544 
545  GO proc0IndexBase = indexBase;
546  broadcast<int, GO> (*comm_, 0, outArg (proc0IndexBase));
547  GO minIndexBase = indexBase;
548  GO maxIndexBase = indexBase;
549  reduceAll<int, GO> (*comm, REDUCE_MIN, indexBase, outArg (minIndexBase));
550  reduceAll<int, GO> (*comm, REDUCE_MAX, indexBase, outArg (maxIndexBase));
551  TEUCHOS_TEST_FOR_EXCEPTION
552  (minIndexBase != maxIndexBase || indexBase != minIndexBase,
553  std::invalid_argument, errorMessagePrefix <<
554  "All processes must provide the same indexBase argument. "
555  "Process 0 set indexBase = " << proc0IndexBase << ". The "
556  "calling process " << comm->getRank() << " set indexBase="
557  << indexBase << ". The min and max values over all "
558  "processes are " << minIndexBase << " resp. " << maxIndexBase
559  << ".");
560 
561  // Make sure that the sum of numLocalElements over all processes
562  // equals numGlobalElements.
563  TEUCHOS_TEST_FOR_EXCEPTION
564  (numGlobalElements != GSTI &&
565  debugGlobalSum != numGlobalElements, std::invalid_argument,
566  errorMessagePrefix << "The sum of each process' number of "
567  "indices over all processes, " << debugGlobalSum << ", != "
568  << "numGlobalElements=" << numGlobalElements << ". If you "
569  "would like this constructor to compute numGlobalElements "
570  "for you, you may set numGlobalElements="
571  "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid() "
572  "on input. Please note that this is NOT necessarily -1.");
573  }
574  return debugGlobalSum;
575  }
576 
577  template <class LocalOrdinal, class GlobalOrdinal, class Node>
578  void
579  Map<LocalOrdinal,GlobalOrdinal,Node>::
580  initWithNonownedHostIndexList(
581  const char errorMessagePrefix[],
582  const global_size_t numGlobalElements,
583  const Kokkos::View<const global_ordinal_type*,
584  Kokkos::LayoutLeft,
585  Kokkos::HostSpace,
586  Kokkos::MemoryUnmanaged>& entryList_host,
587  const global_ordinal_type indexBase,
588  const Teuchos::RCP<const Teuchos::Comm<int>>& comm)
589  {
590  Tpetra::Details::ProfilingRegion pr("Map::initWithNonownedHostIndexList()");
591 
592  using Kokkos::LayoutLeft;
593  using Kokkos::subview;
594  using Kokkos::View;
595  using Kokkos::view_alloc;
596  using Kokkos::WithoutInitializing;
597  using Teuchos::as;
598  using Teuchos::broadcast;
599  using Teuchos::outArg;
600  using Teuchos::ptr;
601  using Teuchos::REDUCE_MAX;
602  using Teuchos::REDUCE_MIN;
603  using Teuchos::REDUCE_SUM;
604  using Teuchos::reduceAll;
605  using LO = local_ordinal_type;
606  using GO = global_ordinal_type;
607  using GST = global_size_t;
608  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
609  // Make sure that Kokkos has been initialized (Github Issue #513).
610  TEUCHOS_TEST_FOR_EXCEPTION
611  (! Kokkos::is_initialized (), std::runtime_error,
612  errorMessagePrefix << "The Kokkos execution space "
613  << Teuchos::TypeNameTraits<execution_space>::name()
614  << " has not been initialized. "
615  "Please initialize it before creating a Map.")
616 
617  // The user has specified the distribution of indices over the
618  // processes, via the input array of global indices on each
619  // process. The distribution is not necessarily contiguous or
620  // equally shared over the processes.
621 
622  // The length of the input array on this process is the number of
623  // local indices to associate with this process, even though the
624  // input array contains global indices. We assume that the number
625  // of local indices on a process can be stored in a size_t;
626  // numLocalElements_ is a size_t, so this variable and that should
627  // have the same type.
628  const size_t numLocalElements(entryList_host.size());
629 
630  initialNonuniformDebugCheck(errorMessagePrefix, numGlobalElements,
631  numLocalElements, indexBase, comm);
632 
633  // NOTE (mfh 20 Feb 2013, 10 Oct 2016) In some sense, this global
634  // reduction is redundant, since the directory Map will have to do
635  // the same thing. Thus, we could do the scan and broadcast for
636  // the directory Map here, and give the computed offsets to the
637  // directory Map's constructor. However, a reduction costs less
638  // than a scan and broadcast, so this still saves time if users of
639  // this Map don't ever need the Directory (i.e., if they never
640  // call getRemoteIndexList on this Map).
641  if (numGlobalElements != GSTI) {
642  numGlobalElements_ = numGlobalElements; // Use the user's value.
643  }
644  else { // The user wants us to compute the sum.
645  reduceAll(*comm, REDUCE_SUM,
646  static_cast<GST>(numLocalElements),
647  outArg(numGlobalElements_));
648  }
649 
650  // mfh 20 Feb 2013: We've never quite done the right thing for
651  // duplicate GIDs here. Duplicate GIDs have always been counted
652  // distinctly in numLocalElements_, and thus should get a
653  // different LID. However, we've always used std::map or a hash
654  // table for the GID -> LID lookup table, so distinct GIDs always
655  // map to the same LID. Furthermore, the order of the input GID
656  // list matters, so it's not desirable to sort for determining
657  // uniqueness.
658  //
659  // I've chosen for now to write this code as if the input GID list
660  // contains no duplicates. If this is not desired, we could use
661  // the lookup table itself to determine uniqueness: If we haven't
662  // seen the GID before, it gets a new LID and it's added to the
663  // LID -> GID and GID -> LID tables. If we have seen the GID
664  // before, it doesn't get added to either table. I would
665  // implement this, but it would cost more to do the double lookups
666  // in the table (one to check, and one to insert).
667  //
668  // More importantly, since we build the GID -> LID table in (a
669  // thread-) parallel (way), the order in which duplicate GIDs may
670  // get inserted is not defined. This would make the assignment of
671  // LID to GID nondeterministic.
672 
673  numLocalElements_ = numLocalElements;
674  indexBase_ = indexBase;
675 
676  minMyGID_ = indexBase_;
677  maxMyGID_ = indexBase_;
678 
679  // NOTE (mfh 27 May 2015): While finding the initial contiguous
680  // GID range requires looking at all the GIDs in the range,
681  // dismissing an interval of GIDs only requires looking at the
682  // first and last GIDs. Thus, we could do binary search backwards
683  // from the end in order to catch the common case of a contiguous
684  // interval followed by noncontiguous entries. On the other hand,
685  // we could just expose this case explicitly as yet another Map
686  // constructor, and avoid the trouble of detecting it.
687  if (numLocalElements_ > 0) {
688  // Find contiguous GID range, with the restriction that the
689  // beginning of the range starts with the first entry. While
690  // doing so, fill in the LID -> GID table.
691  typename decltype (lgMap_)::non_const_type lgMap
692  (view_alloc ("lgMap", WithoutInitializing), numLocalElements_);
693  auto lgMap_host =
694  Kokkos::create_mirror_view (Kokkos::HostSpace (), lgMap);
695 
696  // The input array entryList_host is already on host, so we
697  // don't need to take a host view of it.
698  // auto entryList_host =
699  // Kokkos::create_mirror_view (Kokkos::HostSpace (), entryList);
700  // Kokkos::deep_copy (entryList_host, entryList);
701 
702  firstContiguousGID_ = entryList_host[0];
703  lastContiguousGID_ = firstContiguousGID_+1;
704 
705  // FIXME (mfh 23 Sep 2015) We need to copy the input GIDs
706  // anyway, so we have to look at them all. The logical way to
707  // find the first noncontiguous entry would thus be to "reduce,"
708  // where the local reduction result is whether entryList[i] + 1
709  // == entryList[i+1].
710 
711  lgMap_host[0] = firstContiguousGID_;
712  size_t i = 1;
713  for ( ; i < numLocalElements_; ++i) {
714  const GO curGid = entryList_host[i];
715  const LO curLid = as<LO> (i);
716 
717  if (lastContiguousGID_ != curGid) break;
718 
719  // Add the entry to the LID->GID table only after we know that
720  // the current GID is in the initial contiguous sequence, so
721  // that we don't repeat adding it in the first iteration of
722  // the loop below over the remaining noncontiguous GIDs.
723  lgMap_host[curLid] = curGid;
724  ++lastContiguousGID_;
725  }
726  --lastContiguousGID_;
727  // NOTE: i is the first non-contiguous index.
728 
729  // [firstContiguousGID_, lastContigousGID_] is the initial
730  // sequence of contiguous GIDs. We can start the min and max
731  // GID using this range.
732  minMyGID_ = firstContiguousGID_;
733  maxMyGID_ = lastContiguousGID_;
734 
735  // Compute the GID -> LID lookup table, _not_ including the
736  // initial sequence of contiguous GIDs.
737  LO firstNonContiguous_loc=i;
738  {
739 
740  const std::pair<size_t, size_t> ncRange (i, entryList_host.extent (0));
741  auto nonContigGids_host = subview (entryList_host, ncRange);
742  TEUCHOS_TEST_FOR_EXCEPTION
743  (static_cast<size_t> (nonContigGids_host.extent (0)) !=
744  static_cast<size_t> (entryList_host.extent (0) - i),
745  std::logic_error, "Tpetra::Map noncontiguous constructor: "
746  "nonContigGids_host.extent(0) = "
747  << nonContigGids_host.extent (0)
748  << " != entryList_host.extent(0) - i = "
749  << (entryList_host.extent (0) - i) << " = "
750  << entryList_host.extent (0) << " - " << i
751  << ". Please report this bug to the Tpetra developers.");
752 
753  // FixedHashTable's constructor expects an owned device View,
754  // so we must deep-copy the subview of the input indices.
755  View<GO*, LayoutLeft, device_type>
756  nonContigGids (view_alloc ("nonContigGids", WithoutInitializing),
757  nonContigGids_host.size ());
758 
759 
760  // DEEP_COPY REVIEW - HOST-TO-DEVICE
761  Kokkos::deep_copy (execution_space(), nonContigGids, nonContigGids_host);
762  Kokkos::fence("Map::initWithNonownedHostIndexList"); // for UVM issues below - which will be refatored soon so FixedHashTable can build as pure CudaSpace - then I think remove this fence
763 
764  glMap_ = global_to_local_table_type(nonContigGids,
765  firstNonContiguous_loc);
766  // Make host version - when memory spaces match these just do trivial assignment
767  glMapHost_ = global_to_local_table_host_type(glMap_);
768  }
769 
770  // FIXME (mfh 10 Oct 2016) When we construct the global-to-local
771  // table above, we have to look at all the (noncontiguous) input
772  // indices anyway. Thus, why not have the constructor compute
773  // and return the min and max?
774 
775  for ( ; i < numLocalElements_; ++i) {
776  const GO curGid = entryList_host[i];
777  const LO curLid = static_cast<LO> (i);
778  lgMap_host[curLid] = curGid; // LID -> GID table
779 
780  // While iterating through entryList, we compute its
781  // (process-local) min and max elements.
782  if (curGid < minMyGID_) {
783  minMyGID_ = curGid;
784  }
785  if (curGid > maxMyGID_) {
786  maxMyGID_ = curGid;
787  }
788  }
789 
790  // We filled lgMap on host above; now sync back to device.
791  // DEEP_COPY REVIEW - HOST-TO-DEVICE
792  Kokkos::deep_copy (execution_space(), lgMap, lgMap_host);
793 
794  // "Commit" the local-to-global lookup table we filled in above.
795  lgMap_ = lgMap;
796  // We've already created this, so use it.
797  lgMapHost_ = lgMap_host;
798 
799 
800  }
801  else {
802  minMyGID_ = std::numeric_limits<GlobalOrdinal>::max();
803  maxMyGID_ = std::numeric_limits<GlobalOrdinal>::lowest();
804  // This insures tests for GIDs in the range
805  // [firstContiguousGID_, lastContiguousGID_] fail for processes
806  // with no local elements.
807  firstContiguousGID_ = indexBase_+1;
808  lastContiguousGID_ = indexBase_;
809  // glMap_ was default constructed, so it's already empty.
810 
811  }
812 
813 
814 
815 
816  // Compute the min and max of all processes' GIDs. If
817  // numLocalElements_ == 0 on this process, minMyGID_ and maxMyGID_
818  // are both indexBase_. This is wrong, but fixing it would
819  // require either a fancy sparse all-reduce, or a custom reduction
820  // operator that ignores invalid values ("invalid" means
821  // Tpetra::Details::OrdinalTraits<GO>::invalid()).
822  //
823  // Also, while we're at it, use the same all-reduce to figure out
824  // if the Map is distributed. "Distributed" means that there is
825  // at least one process with a number of local elements less than
826  // the number of global elements.
827  //
828  // We're computing the min and max of all processes' GIDs using a
829  // single MAX all-reduce, because min(x,y) = -max(-x,-y) (when x
830  // and y are signed). (This lets us combine the min and max into
831  // a single all-reduce.) If each process sets localDist=1 if its
832  // number of local elements is strictly less than the number of
833  // global elements, and localDist=0 otherwise, then a MAX
834  // all-reduce on localDist tells us if the Map is distributed (1
835  // if yes, 0 if no). Thus, we can append localDist onto the end
836  // of the data and get the global result from the all-reduce.
837  if (std::numeric_limits<GO>::is_signed) {
838  // Does my process NOT own all the elements?
839  const GO localDist =
840  (as<GST> (numLocalElements_) < numGlobalElements_) ? 1 : 0;
841 
842  GO minMaxInput[3];
843  minMaxInput[0] = -minMyGID_;
844  minMaxInput[1] = maxMyGID_;
845  minMaxInput[2] = localDist;
846 
847  GO minMaxOutput[3];
848  minMaxOutput[0] = 0;
849  minMaxOutput[1] = 0;
850  minMaxOutput[2] = 0;
851  reduceAll<int, GO> (*comm, REDUCE_MAX, 3, minMaxInput, minMaxOutput);
852  minAllGID_ = -minMaxOutput[0];
853  maxAllGID_ = minMaxOutput[1];
854  const GO globalDist = minMaxOutput[2];
855  distributed_ = (comm_->getSize () > 1 && globalDist == 1);
856  }
857  else { // unsigned; use two reductions
858  // This is always correct, no matter the signedness of GO.
859  reduceAll<int, GO> (*comm_, REDUCE_MIN, minMyGID_, outArg (minAllGID_));
860  reduceAll<int, GO> (*comm_, REDUCE_MAX, maxMyGID_, outArg (maxAllGID_));
861  distributed_ = checkIsDist ();
862  }
863 
864  contiguous_ = false; // "Contiguous" is conservative.
865 
866  TEUCHOS_TEST_FOR_EXCEPTION(
867  minAllGID_ < indexBase_,
868  std::invalid_argument,
869  "Tpetra::Map constructor (noncontiguous): "
870  "Minimum global ID = " << minAllGID_ << " over all process(es) is "
871  "less than the given indexBase = " << indexBase_ << ".");
872 
873  // Create the Directory on demand in getRemoteIndexList().
874  //setupDirectory ();
875  }
876 
877  template <class LocalOrdinal, class GlobalOrdinal, class Node>
879  Map (const global_size_t numGlobalElements,
880  const GlobalOrdinal indexList[],
881  const LocalOrdinal indexListSize,
882  const GlobalOrdinal indexBase,
883  const Teuchos::RCP<const Teuchos::Comm<int> >& comm) :
884  comm_ (comm),
885  uniform_ (false),
886  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
887  {
888  using std::endl;
889  const char funcName[] =
890  "Map(gblNumInds,indexList,indexListSize,indexBase,comm)";
891 
892  const bool verbose = Details::Behavior::verbose("Map");
893  std::unique_ptr<std::string> prefix;
894  if (verbose) {
895  prefix = Details::createPrefix(
896  comm_.getRawPtr(), "Map", funcName);
897  std::ostringstream os;
898  os << *prefix << "Start" << endl;
899  std::cerr << os.str();
900  }
904  checkMapInputArray ("(GST, const GO[], LO, GO, comm)",
905  indexList, static_cast<size_t> (indexListSize),
906  comm.getRawPtr ());
907  // Not quite sure if I trust all code to behave correctly if the
908  // pointer is nonnull but the array length is nonzero, so I'll
909  // make sure the raw pointer is null if the length is zero.
910  const GlobalOrdinal* const indsRaw = indexListSize == 0 ? NULL : indexList;
911  Kokkos::View<const GlobalOrdinal*,
912  Kokkos::LayoutLeft,
913  Kokkos::HostSpace,
914  Kokkos::MemoryUnmanaged> inds (indsRaw, indexListSize);
915  initWithNonownedHostIndexList(funcName, numGlobalElements, inds,
916  indexBase, comm);
917  if (verbose) {
918  std::ostringstream os;
919  os << *prefix << "Done" << endl;
920  std::cerr << os.str();
921  }
922  }
923 
924  template <class LocalOrdinal, class GlobalOrdinal, class Node>
926  Map (const global_size_t numGlobalElements,
927  const Teuchos::ArrayView<const GlobalOrdinal>& entryList,
928  const GlobalOrdinal indexBase,
929  const Teuchos::RCP<const Teuchos::Comm<int> >& comm) :
930  comm_ (comm),
931  uniform_ (false),
932  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
933  {
934  using std::endl;
935  const char* funcName = "Map(gblNumInds,entryList(Teuchos::ArrayView),indexBase,comm)";
936 
937  const bool verbose = Details::Behavior::verbose("Map");
938  std::unique_ptr<std::string> prefix;
939  if (verbose) {
940  prefix = Details::createPrefix(
941  comm_.getRawPtr(), "Map", funcName);
942  std::ostringstream os;
943  os << *prefix << "Start" << endl;
944  std::cerr << os.str();
945  }
949  const size_t numLclInds = static_cast<size_t> (entryList.size ());
950  checkMapInputArray ("(GST, ArrayView, GO, comm)",
951  entryList.getRawPtr (), numLclInds,
952  comm.getRawPtr ());
953  // Not quite sure if I trust both ArrayView and View to behave
954  // correctly if the pointer is nonnull but the array length is
955  // nonzero, so I'll make sure it's null if the length is zero.
956  const GlobalOrdinal* const indsRaw =
957  numLclInds == 0 ? NULL : entryList.getRawPtr ();
958  Kokkos::View<const GlobalOrdinal*,
959  Kokkos::LayoutLeft,
960  Kokkos::HostSpace,
961  Kokkos::MemoryUnmanaged> inds (indsRaw, numLclInds);
962  initWithNonownedHostIndexList(funcName, numGlobalElements, inds,
963  indexBase, comm);
964  if (verbose) {
965  std::ostringstream os;
966  os << *prefix << "Done" << endl;
967  std::cerr << os.str();
968  }
969  }
970 
971 
972  template <class LocalOrdinal, class GlobalOrdinal, class Node>
974  Map (const global_size_t numGlobalElements,
975  const Kokkos::View<const GlobalOrdinal*, device_type>& entryList,
976  const GlobalOrdinal indexBase,
977  const Teuchos::RCP<const Teuchos::Comm<int> >& comm) :
978  comm_ (comm),
979  uniform_ (false),
980  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
981  {
982  using Kokkos::LayoutLeft;
983  using Kokkos::subview;
984  using Kokkos::View;
985  using Kokkos::view_alloc;
986  using Kokkos::WithoutInitializing;
987  using Teuchos::arcp;
988  using Teuchos::ArrayView;
989  using Teuchos::as;
990  using Teuchos::broadcast;
991  using Teuchos::outArg;
992  using Teuchos::ptr;
993  using Teuchos::REDUCE_MAX;
994  using Teuchos::REDUCE_MIN;
995  using Teuchos::REDUCE_SUM;
996  using Teuchos::reduceAll;
997  using Teuchos::typeName;
998  using std::endl;
999  using LO = local_ordinal_type;
1000  using GO = global_ordinal_type;
1001  using GST = global_size_t;
1002  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
1003  const char funcName[] =
1004  "Map(gblNumInds,entryList(Kokkos::View),indexBase,comm)";
1005 
1006  const bool verbose = Details::Behavior::verbose("Map");
1007  std::unique_ptr<std::string> prefix;
1008  if (verbose) {
1009  prefix = Details::createPrefix(
1010  comm_.getRawPtr(), "Map", funcName);
1011  std::ostringstream os;
1012  os << *prefix << "Start" << endl;
1013  std::cerr << os.str();
1014  }
1017  Tpetra::Details::ProfilingRegion pr(funcName);
1018  checkMapInputArray ("(GST, Kokkos::View, GO, comm)",
1019  entryList.data (),
1020  static_cast<size_t> (entryList.extent (0)),
1021  comm.getRawPtr ());
1022 
1023  // The user has specified the distribution of indices over the
1024  // processes, via the input array of global indices on each
1025  // process. The distribution is not necessarily contiguous or
1026  // equally shared over the processes.
1027 
1028  // The length of the input array on this process is the number of
1029  // local indices to associate with this process, even though the
1030  // input array contains global indices. We assume that the number
1031  // of local indices on a process can be stored in a size_t;
1032  // numLocalElements_ is a size_t, so this variable and that should
1033  // have the same type.
1034  const size_t numLocalElements(entryList.size());
1035 
1036  initialNonuniformDebugCheck(funcName, numGlobalElements,
1037  numLocalElements, indexBase, comm);
1038 
1039  // NOTE (mfh 20 Feb 2013, 10 Oct 2016) In some sense, this global
1040  // reduction is redundant, since the directory Map will have to do
1041  // the same thing. Thus, we could do the scan and broadcast for
1042  // the directory Map here, and give the computed offsets to the
1043  // directory Map's constructor. However, a reduction costs less
1044  // than a scan and broadcast, so this still saves time if users of
1045  // this Map don't ever need the Directory (i.e., if they never
1046  // call getRemoteIndexList on this Map).
1047  if (numGlobalElements != GSTI) {
1048  numGlobalElements_ = numGlobalElements; // Use the user's value.
1049  }
1050  else { // The user wants us to compute the sum.
1051  reduceAll(*comm, REDUCE_SUM,
1052  static_cast<GST>(numLocalElements),
1053  outArg(numGlobalElements_));
1054  }
1055 
1056 
1057  // mfh 20 Feb 2013: We've never quite done the right thing for
1058  // duplicate GIDs here. Duplicate GIDs have always been counted
1059  // distinctly in numLocalElements_, and thus should get a
1060  // different LID. However, we've always used std::map or a hash
1061  // table for the GID -> LID lookup table, so distinct GIDs always
1062  // map to the same LID. Furthermore, the order of the input GID
1063  // list matters, so it's not desirable to sort for determining
1064  // uniqueness.
1065  //
1066  // I've chosen for now to write this code as if the input GID list
1067  // contains no duplicates. If this is not desired, we could use
1068  // the lookup table itself to determine uniqueness: If we haven't
1069  // seen the GID before, it gets a new LID and it's added to the
1070  // LID -> GID and GID -> LID tables. If we have seen the GID
1071  // before, it doesn't get added to either table. I would
1072  // implement this, but it would cost more to do the double lookups
1073  // in the table (one to check, and one to insert).
1074  //
1075  // More importantly, since we build the GID -> LID table in (a
1076  // thread-) parallel (way), the order in which duplicate GIDs may
1077  // get inserted is not defined. This would make the assignment of
1078  // LID to GID nondeterministic.
1079 
1080  numLocalElements_ = numLocalElements;
1081  indexBase_ = indexBase;
1082 
1083  minMyGID_ = indexBase_;
1084  maxMyGID_ = indexBase_;
1085 
1086  // NOTE (mfh 27 May 2015): While finding the initial contiguous
1087  // GID range requires looking at all the GIDs in the range,
1088  // dismissing an interval of GIDs only requires looking at the
1089  // first and last GIDs. Thus, we could do binary search backwards
1090  // from the end in order to catch the common case of a contiguous
1091  // interval followed by noncontiguous entries. On the other hand,
1092  // we could just expose this case explicitly as yet another Map
1093  // constructor, and avoid the trouble of detecting it.
1094  if (numLocalElements_ > 0) {
1095  // Find contiguous GID range, with the restriction that the
1096  // beginning of the range starts with the first entry. While
1097  // doing so, fill in the LID -> GID table.
1098  typename decltype (lgMap_)::non_const_type lgMap
1099  (view_alloc ("lgMap", WithoutInitializing), numLocalElements_);
1100 
1101  // Because you can't use lambdas in constructors on CUDA. Or using private/protected data.
1102  // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
1103  Kokkos::deep_copy(typename device_type::execution_space(),lgMap,entryList);
1104  LO lastContiguousGID_loc;
1105  computeConstantsOnDevice(entryList,minMyGID_,maxMyGID_,firstContiguousGID_,lastContiguousGID_,lastContiguousGID_loc);
1106  LO firstNonContiguous_loc = lastContiguousGID_loc+1;
1107  auto nonContigGids = Kokkos::subview(entryList,std::pair<size_t,size_t>(firstNonContiguous_loc,entryList.extent(0)));
1108 
1109  // NOTE: We do not fill the glMapHost_ and lgMapHost_ views here. They will be filled lazily later
1110  glMap_ = global_to_local_table_type(nonContigGids,
1111  firstNonContiguous_loc);
1112 
1113  // "Commit" the local-to-global lookup table we filled in above.
1114  lgMap_ = lgMap;
1115 
1116  }
1117  else {
1118  minMyGID_ = std::numeric_limits<GlobalOrdinal>::max();
1119  maxMyGID_ = std::numeric_limits<GlobalOrdinal>::lowest();
1120  // This insures tests for GIDs in the range
1121  // [firstContiguousGID_, lastContiguousGID_] fail for processes
1122  // with no local elements.
1123  firstContiguousGID_ = indexBase_+1;
1124  lastContiguousGID_ = indexBase_;
1125  // glMap_ was default constructed, so it's already empty.
1126  }
1127 
1128 
1129  // Compute the min and max of all processes' GIDs. If
1130  // numLocalElements_ == 0 on this process, minMyGID_ and maxMyGID_
1131  // are both indexBase_. This is wrong, but fixing it would
1132  // require either a fancy sparse all-reduce, or a custom reduction
1133  // operator that ignores invalid values ("invalid" means
1134  // Tpetra::Details::OrdinalTraits<GO>::invalid()).
1135  //
1136  // Also, while we're at it, use the same all-reduce to figure out
1137  // if the Map is distributed. "Distributed" means that there is
1138  // at least one process with a number of local elements less than
1139  // the number of global elements.
1140  //
1141  // We're computing the min and max of all processes' GIDs using a
1142  // single MAX all-reduce, because min(x,y) = -max(-x,-y) (when x
1143  // and y are signed). (This lets us combine the min and max into
1144  // a single all-reduce.) If each process sets localDist=1 if its
1145  // number of local elements is strictly less than the number of
1146  // global elements, and localDist=0 otherwise, then a MAX
1147  // all-reduce on localDist tells us if the Map is distributed (1
1148  // if yes, 0 if no). Thus, we can append localDist onto the end
1149  // of the data and get the global result from the all-reduce.
1150  if (std::numeric_limits<GO>::is_signed) {
1151  // Does my process NOT own all the elements?
1152  const GO localDist =
1153  (as<GST> (numLocalElements_) < numGlobalElements_) ? 1 : 0;
1154 
1155  GO minMaxInput[3];
1156  minMaxInput[0] = -minMyGID_;
1157  minMaxInput[1] = maxMyGID_;
1158  minMaxInput[2] = localDist;
1159 
1160  GO minMaxOutput[3];
1161  minMaxOutput[0] = 0;
1162  minMaxOutput[1] = 0;
1163  minMaxOutput[2] = 0;
1164  reduceAll<int, GO> (*comm, REDUCE_MAX, 3, minMaxInput, minMaxOutput);
1165  minAllGID_ = -minMaxOutput[0];
1166  maxAllGID_ = minMaxOutput[1];
1167  const GO globalDist = minMaxOutput[2];
1168  distributed_ = (comm_->getSize () > 1 && globalDist == 1);
1169  }
1170  else { // unsigned; use two reductions
1171  // This is always correct, no matter the signedness of GO.
1172  reduceAll<int, GO> (*comm_, REDUCE_MIN, minMyGID_, outArg (minAllGID_));
1173  reduceAll<int, GO> (*comm_, REDUCE_MAX, maxMyGID_, outArg (maxAllGID_));
1174  distributed_ = checkIsDist ();
1175  }
1176 
1177  contiguous_ = false; // "Contiguous" is conservative.
1178 
1179  TEUCHOS_TEST_FOR_EXCEPTION(
1180  minAllGID_ < indexBase_,
1181  std::invalid_argument,
1182  "Tpetra::Map constructor (noncontiguous): "
1183  "Minimum global ID = " << minAllGID_ << " over all process(es) is "
1184  "less than the given indexBase = " << indexBase_ << ".");
1185 
1186  // Create the Directory on demand in getRemoteIndexList().
1187  //setupDirectory ();
1188 
1189  if (verbose) {
1190  std::ostringstream os;
1191  os << *prefix << "Done" << endl;
1192  std::cerr << os.str();
1193  }
1194  }
1195 
1196 
1197  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1199  {
1200  if (! Kokkos::is_initialized ()) {
1201  std::ostringstream os;
1202  os << "WARNING: Tpetra::Map destructor (~Map()) is being called after "
1203  "Kokkos::finalize() has been called. This is user error! There are "
1204  "two likely causes: " << std::endl <<
1205  " 1. You have a static Tpetra::Map (or RCP or shared_ptr of a Map)"
1206  << std::endl <<
1207  " 2. You declare and construct a Tpetra::Map (or RCP or shared_ptr "
1208  "of a Tpetra::Map) at the same scope in main() as Kokkos::finalize() "
1209  "or Tpetra::finalize()." << std::endl << std::endl <<
1210  "Don't do either of these! Please refer to GitHib Issue #2372."
1211  << std::endl;
1212  ::Tpetra::Details::printOnce (std::cerr, os.str (),
1213  this->getComm ().getRawPtr ());
1214  }
1215  else {
1216  using ::Tpetra::Details::mpiIsInitialized;
1217  using ::Tpetra::Details::mpiIsFinalized;
1218  using ::Tpetra::Details::teuchosCommIsAnMpiComm;
1219 
1220  Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getComm ();
1221  if (! comm.is_null () && teuchosCommIsAnMpiComm (*comm) &&
1222  mpiIsInitialized () && mpiIsFinalized ()) {
1223  // Tpetra itself does not require MPI, even if building with
1224  // MPI. It is legal to create Tpetra objects that do not use
1225  // MPI, even in an MPI program. However, calling Tpetra stuff
1226  // after MPI_Finalize() has been called is a bad idea, since
1227  // some Tpetra defaults may use MPI if available.
1228  std::ostringstream os;
1229  os << "WARNING: Tpetra::Map destructor (~Map()) is being called after "
1230  "MPI_Finalize() has been called. This is user error! There are "
1231  "two likely causes: " << std::endl <<
1232  " 1. You have a static Tpetra::Map (or RCP or shared_ptr of a Map)"
1233  << std::endl <<
1234  " 2. You declare and construct a Tpetra::Map (or RCP or shared_ptr "
1235  "of a Tpetra::Map) at the same scope in main() as MPI_finalize() or "
1236  "Tpetra::finalize()." << std::endl << std::endl <<
1237  "Don't do either of these! Please refer to GitHib Issue #2372."
1238  << std::endl;
1239  ::Tpetra::Details::printOnce (std::cerr, os.str (), comm.getRawPtr ());
1240  }
1241  }
1242  // mfh 20 Mar 2018: We can't check Tpetra::isInitialized() yet,
1243  // because Tpetra does not yet require Tpetra::initialize /
1244  // Tpetra::finalize.
1245  }
1246 
1247  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1248  bool
1250  {
1251  TEUCHOS_TEST_FOR_EXCEPTION(
1252  getComm ().is_null (), std::logic_error, "Tpetra::Map::isOneToOne: "
1253  "getComm() returns null. Please report this bug to the Tpetra "
1254  "developers.");
1255 
1256  // This is a collective operation, if it hasn't been called before.
1257  setupDirectory ();
1258  return directory_->isOneToOne (*this);
1259  }
1260 
1261  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1262  LocalOrdinal
1264  getLocalElement (GlobalOrdinal globalIndex) const
1265  {
1266  if (isContiguous ()) {
1267  if (globalIndex < getMinGlobalIndex () ||
1268  globalIndex > getMaxGlobalIndex ()) {
1269  return Tpetra::Details::OrdinalTraits<LocalOrdinal>::invalid ();
1270  }
1271  return static_cast<LocalOrdinal> (globalIndex - getMinGlobalIndex ());
1272  }
1273  else if (globalIndex >= firstContiguousGID_ &&
1274  globalIndex <= lastContiguousGID_) {
1275  return static_cast<LocalOrdinal> (globalIndex - firstContiguousGID_);
1276  }
1277  else {
1278  // If the given global index is not in the table, this returns
1279  // the same value as OrdinalTraits<LocalOrdinal>::invalid().
1280  // glMapHost_ is Host and does not assume UVM
1281  lazyPushToHost();
1282  return glMapHost_.get (globalIndex);
1283  }
1284  }
1285 
1286  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1287  GlobalOrdinal
1289  getGlobalElement (LocalOrdinal localIndex) const
1290  {
1291  if (localIndex < getMinLocalIndex () || localIndex > getMaxLocalIndex ()) {
1292  return Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ();
1293  }
1294  if (isContiguous ()) {
1295  return getMinGlobalIndex () + localIndex;
1296  }
1297  else {
1298  // This is a host Kokkos::View access, with no RCP or ArrayRCP
1299  // involvement. As a result, it is thread safe.
1300  //
1301  // lgMapHost_ is a host pointer; this does NOT assume UVM.
1302  lazyPushToHost();
1303  return lgMapHost_[localIndex];
1304  }
1305  }
1306 
1307  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1308  bool
1310  isNodeLocalElement (LocalOrdinal localIndex) const
1311  {
1312  if (localIndex < getMinLocalIndex () || localIndex > getMaxLocalIndex ()) {
1313  return false;
1314  } else {
1315  return true;
1316  }
1317  }
1318 
1319  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1320  bool
1322  isNodeGlobalElement (GlobalOrdinal globalIndex) const {
1323  return this->getLocalElement (globalIndex) !=
1324  Tpetra::Details::OrdinalTraits<LocalOrdinal>::invalid ();
1325  }
1326 
1327  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1329  return uniform_;
1330  }
1331 
1332  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1334  return contiguous_;
1335  }
1336 
1337 
1338  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1342  {
1343  return local_map_type (glMap_, lgMap_, getIndexBase (),
1344  getMinGlobalIndex (), getMaxGlobalIndex (),
1345  firstContiguousGID_, lastContiguousGID_,
1346  getLocalNumElements (), isContiguous ());
1347  }
1348 
1349  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1350  bool
1353  {
1354  using Teuchos::outArg;
1355  using Teuchos::REDUCE_MIN;
1356  using Teuchos::reduceAll;
1357  //
1358  // Tests that avoid the Boolean all-reduce below by using
1359  // globally consistent quantities.
1360  //
1361  if (this == &map) {
1362  // Pointer equality on one process always implies pointer
1363  // equality on all processes, since Map is immutable.
1364  return true;
1365  }
1366  else if (getComm ()->getSize () != map.getComm ()->getSize ()) {
1367  // The two communicators have different numbers of processes.
1368  // It's not correct to call isCompatible() in that case. This
1369  // may result in the all-reduce hanging below.
1370  return false;
1371  }
1372  else if (getGlobalNumElements () != map.getGlobalNumElements ()) {
1373  // Two Maps are definitely NOT compatible if they have different
1374  // global numbers of indices.
1375  return false;
1376  }
1377  else if (isContiguous () && isUniform () &&
1378  map.isContiguous () && map.isUniform ()) {
1379  // Contiguous uniform Maps with the same number of processes in
1380  // their communicators, and with the same global numbers of
1381  // indices, are always compatible.
1382  return true;
1383  }
1384  else if (! isContiguous () && ! map.isContiguous () &&
1385  lgMap_.extent (0) != 0 && map.lgMap_.extent (0) != 0 &&
1386  lgMap_.data () == map.lgMap_.data ()) {
1387  // Noncontiguous Maps whose global index lists are nonempty and
1388  // have the same pointer must be the same (and therefore
1389  // contiguous).
1390  //
1391  // Nonempty is important. For example, consider a communicator
1392  // with two processes, and two Maps that share this
1393  // communicator, with zero global indices on the first process,
1394  // and different nonzero numbers of global indices on the second
1395  // process. In that case, on the first process, the pointers
1396  // would both be NULL.
1397  return true;
1398  }
1399 
1400  TEUCHOS_TEST_FOR_EXCEPTION(
1401  getGlobalNumElements () != map.getGlobalNumElements (), std::logic_error,
1402  "Tpetra::Map::isCompatible: There's a bug in this method. We've already "
1403  "checked that this condition is true above, but it's false here. "
1404  "Please report this bug to the Tpetra developers.");
1405 
1406  // Do both Maps have the same number of indices on each process?
1407  const int locallyCompat =
1408  (getLocalNumElements () == map.getLocalNumElements ()) ? 1 : 0;
1409 
1410  int globallyCompat = 0;
1411  reduceAll<int, int> (*comm_, REDUCE_MIN, locallyCompat, outArg (globallyCompat));
1412  return (globallyCompat == 1);
1413  }
1414 
1415  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1416  bool
1419  {
1420  using Teuchos::ArrayView;
1421  using GO = global_ordinal_type;
1422  using size_type = typename ArrayView<const GO>::size_type;
1423 
1424  // If both Maps are contiguous, we can compare their GID ranges
1425  // easily by looking at the min and max GID on this process.
1426  // Otherwise, we'll compare their GID lists. If only one Map is
1427  // contiguous, then we only have to call getLocalElementList() on
1428  // the noncontiguous Map. (It's best to avoid calling it on a
1429  // contiguous Map, since it results in unnecessary storage that
1430  // persists for the lifetime of the Map.)
1431 
1432  if (this == &map) {
1433  // Pointer equality on one process always implies pointer
1434  // equality on all processes, since Map is immutable.
1435  return true;
1436  }
1437  else if (getLocalNumElements () != map.getLocalNumElements ()) {
1438  return false;
1439  }
1440  else if (getMinGlobalIndex () != map.getMinGlobalIndex () ||
1441  getMaxGlobalIndex () != map.getMaxGlobalIndex ()) {
1442  return false;
1443  }
1444  else {
1445  if (isContiguous ()) {
1446  if (map.isContiguous ()) {
1447  return true; // min and max match, so the ranges match.
1448  }
1449  else { // *this is contiguous, but map is not contiguous
1450  TEUCHOS_TEST_FOR_EXCEPTION(
1451  ! this->isContiguous () || map.isContiguous (), std::logic_error,
1452  "Tpetra::Map::locallySameAs: BUG");
1453  ArrayView<const GO> rhsElts = map.getLocalElementList ();
1454  const GO minLhsGid = this->getMinGlobalIndex ();
1455  const size_type numRhsElts = rhsElts.size ();
1456  for (size_type k = 0; k < numRhsElts; ++k) {
1457  const GO curLhsGid = minLhsGid + static_cast<GO> (k);
1458  if (curLhsGid != rhsElts[k]) {
1459  return false; // stop on first mismatch
1460  }
1461  }
1462  return true;
1463  }
1464  }
1465  else if (map.isContiguous ()) { // *this is not contiguous, but map is
1466  TEUCHOS_TEST_FOR_EXCEPTION(
1467  this->isContiguous () || ! map.isContiguous (), std::logic_error,
1468  "Tpetra::Map::locallySameAs: BUG");
1469  ArrayView<const GO> lhsElts = this->getLocalElementList ();
1470  const GO minRhsGid = map.getMinGlobalIndex ();
1471  const size_type numLhsElts = lhsElts.size ();
1472  for (size_type k = 0; k < numLhsElts; ++k) {
1473  const GO curRhsGid = minRhsGid + static_cast<GO> (k);
1474  if (curRhsGid != lhsElts[k]) {
1475  return false; // stop on first mismatch
1476  }
1477  }
1478  return true;
1479  }
1480  else if (this->lgMap_.data () == map.lgMap_.data ()) {
1481  // Pointers to LID->GID "map" (actually just an array) are the
1482  // same, and the number of GIDs are the same.
1483  return this->getLocalNumElements () == map.getLocalNumElements ();
1484  }
1485  else { // we actually have to compare the GIDs
1486  if (this->getLocalNumElements () != map.getLocalNumElements ()) {
1487  return false; // We already checked above, but check just in case
1488  }
1489  else {
1490  ArrayView<const GO> lhsElts = getLocalElementList ();
1491  ArrayView<const GO> rhsElts = map.getLocalElementList ();
1492 
1493  // std::equal requires that the latter range is as large as
1494  // the former. We know the ranges have equal length, because
1495  // they have the same number of local entries.
1496  return std::equal (lhsElts.begin (), lhsElts.end (), rhsElts.begin ());
1497  }
1498  }
1499  }
1500  }
1501 
1502  template <class LocalOrdinal,class GlobalOrdinal, class Node>
1503  bool
1506  {
1507  if (this == &map)
1508  return true;
1509 
1510  // We are going to check if lmap1 is fitted into lmap2:
1511  // Is lmap1 (map) a subset of lmap2 (this)?
1512  // And do the first lmap1.getLocalNumElements() global elements
1513  // of lmap1,lmap2 owned on each process exactly match?
1514  auto lmap1 = map.getLocalMap();
1515  auto lmap2 = this->getLocalMap();
1516 
1517  auto numLocalElements1 = lmap1.getLocalNumElements();
1518  auto numLocalElements2 = lmap2.getLocalNumElements();
1519 
1520  if (numLocalElements1 > numLocalElements2) {
1521  // There are more indices in the first map on this process than in second map.
1522  return false;
1523  }
1524 
1525  if (lmap1.isContiguous () && lmap2.isContiguous ()) {
1526  // When both Maps are contiguous, just check the interval inclusion.
1527  return ((lmap1.getMinGlobalIndex () == lmap2.getMinGlobalIndex ()) &&
1528  (lmap1.getMaxGlobalIndex () <= lmap2.getMaxGlobalIndex ()));
1529  }
1530 
1531  if (lmap1.getMinGlobalIndex () < lmap2.getMinGlobalIndex () ||
1532  lmap1.getMaxGlobalIndex () > lmap2.getMaxGlobalIndex ()) {
1533  // The second map does not include the first map bounds, and thus some of
1534  // the first map global indices are not in the second map.
1535  return false;
1536  }
1537 
1538  using LO = local_ordinal_type;
1539  using range_type =
1540  Kokkos::RangePolicy<LO, typename node_type::execution_space>;
1541 
1542  // Check all elements.
1543  LO numDiff = 0;
1544  Kokkos::parallel_reduce(
1545  "isLocallyFitted",
1546  range_type(0, numLocalElements1),
1547  KOKKOS_LAMBDA (const LO i, LO& diff) {
1548  diff += (lmap1.getGlobalElement(i) != lmap2.getGlobalElement(i));
1549  }, numDiff);
1550 
1551  return (numDiff == 0);
1552  }
1553 
1554  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1555  bool
1558  {
1559  using Teuchos::outArg;
1560  using Teuchos::REDUCE_MIN;
1561  using Teuchos::reduceAll;
1562  //
1563  // Tests that avoid the Boolean all-reduce below by using
1564  // globally consistent quantities.
1565  //
1566  if (this == &map) {
1567  // Pointer equality on one process always implies pointer
1568  // equality on all processes, since Map is immutable.
1569  return true;
1570  }
1571  else if (getComm ()->getSize () != map.getComm ()->getSize ()) {
1572  // The two communicators have different numbers of processes.
1573  // It's not correct to call isSameAs() in that case. This
1574  // may result in the all-reduce hanging below.
1575  return false;
1576  }
1577  else if (getGlobalNumElements () != map.getGlobalNumElements ()) {
1578  // Two Maps are definitely NOT the same if they have different
1579  // global numbers of indices.
1580  return false;
1581  }
1582  else if (getMinAllGlobalIndex () != map.getMinAllGlobalIndex () ||
1583  getMaxAllGlobalIndex () != map.getMaxAllGlobalIndex () ||
1584  getIndexBase () != map.getIndexBase ()) {
1585  // If the global min or max global index doesn't match, or if
1586  // the index base doesn't match, then the Maps aren't the same.
1587  return false;
1588  }
1589  else if (isDistributed () != map.isDistributed ()) {
1590  // One Map is distributed and the other is not, which means that
1591  // the Maps aren't the same.
1592  return false;
1593  }
1594  else if (isContiguous () && isUniform () &&
1595  map.isContiguous () && map.isUniform ()) {
1596  // Contiguous uniform Maps with the same number of processes in
1597  // their communicators, with the same global numbers of indices,
1598  // and with matching index bases and ranges, must be the same.
1599  return true;
1600  }
1601 
1602  // The two communicators must have the same number of processes,
1603  // with process ranks occurring in the same order. This uses
1604  // MPI_COMM_COMPARE. The MPI 3.1 standard (Section 6.4) says:
1605  // "Operations that access communicators are local and their
1606  // execution does not require interprocess communication."
1607  // However, just to be sure, I'll put this call after the above
1608  // tests that don't communicate.
1609  if (! ::Tpetra::Details::congruent (*comm_, * (map.getComm ()))) {
1610  return false;
1611  }
1612 
1613  // If we get this far, we need to check local properties and then
1614  // communicate local sameness across all processes.
1615  const int isSame_lcl = locallySameAs (map) ? 1 : 0;
1616 
1617  // Return true if and only if all processes report local sameness.
1618  int isSame_gbl = 0;
1619  reduceAll<int, int> (*comm_, REDUCE_MIN, isSame_lcl, outArg (isSame_gbl));
1620  return isSame_gbl == 1;
1621  }
1622 
1623  namespace { // (anonymous)
1624  template <class LO, class GO, class DT>
1625  class FillLgMap {
1626  public:
1627  FillLgMap (const Kokkos::View<GO*, DT>& lgMap,
1628  const GO startGid) :
1629  lgMap_ (lgMap), startGid_ (startGid)
1630  {
1631  Kokkos::RangePolicy<LO, typename DT::execution_space>
1632  range (static_cast<LO> (0), static_cast<LO> (lgMap.size ()));
1633  Kokkos::parallel_for (range, *this);
1634  }
1635 
1636  KOKKOS_INLINE_FUNCTION void operator () (const LO& lid) const {
1637  lgMap_(lid) = startGid_ + static_cast<GO> (lid);
1638  }
1639 
1640  private:
1641  const Kokkos::View<GO*, DT> lgMap_;
1642  const GO startGid_;
1643  };
1644 
1645  } // namespace (anonymous)
1646 
1647 
1648  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1649  typename Map<LocalOrdinal,GlobalOrdinal,Node>::global_indices_array_type
1651  {
1652  using std::endl;
1653  using LO = local_ordinal_type;
1654  using GO = global_ordinal_type;
1655  using const_lg_view_type = decltype(lgMap_);
1656  using lg_view_type = typename const_lg_view_type::non_const_type;
1657  const bool debug = Details::Behavior::debug("Map");
1658  const bool verbose = Details::Behavior::verbose("Map");
1659 
1660  std::unique_ptr<std::string> prefix;
1661  if (verbose) {
1662  prefix = Details::createPrefix(
1663  comm_.getRawPtr(), "Map", "getMyGlobalIndices");
1664  std::ostringstream os;
1665  os << *prefix << "Start" << endl;
1666  std::cerr << os.str();
1667  }
1668 
1669  // If the local-to-global mapping doesn't exist yet, and if we
1670  // have local entries, then create and fill the local-to-global
1671  // mapping.
1672  const bool needToCreateLocalToGlobalMapping =
1673  lgMap_.extent (0) == 0 && numLocalElements_ > 0;
1674 
1675  if (needToCreateLocalToGlobalMapping) {
1676  if (verbose) {
1677  std::ostringstream os;
1678  os << *prefix << "Need to create lgMap" << endl;
1679  std::cerr << os.str();
1680  }
1681  if (debug) {
1682  // The local-to-global mapping should have been set up already
1683  // for a noncontiguous map.
1684  TEUCHOS_TEST_FOR_EXCEPTION
1685  (! isContiguous(), std::logic_error,
1686  "Tpetra::Map::getMyGlobalIndices: The local-to-global "
1687  "mapping (lgMap_) should have been set up already for a "
1688  "noncontiguous Map. Please report this bug to the Tpetra "
1689  "developers.");
1690  }
1691  const LO numElts = static_cast<LO> (getLocalNumElements ());
1692 
1693  using Kokkos::view_alloc;
1694  using Kokkos::WithoutInitializing;
1695  lg_view_type lgMap ("lgMap", numElts);
1696  if (verbose) {
1697  std::ostringstream os;
1698  os << *prefix << "Fill lgMap" << endl;
1699  std::cerr << os.str();
1700  }
1701  FillLgMap<LO, GO, device_type> fillIt (lgMap, minMyGID_);
1702 
1703  if (verbose) {
1704  std::ostringstream os;
1705  os << *prefix << "Copy lgMap to lgMapHost" << endl;
1706  std::cerr << os.str();
1707  }
1708 
1709  auto lgMapHost = Kokkos::create_mirror_view (Kokkos::HostSpace (), lgMap);
1710  // DEEP_COPY REVIEW - DEVICE-TO-HOST
1711  auto exec_instance = execution_space();
1712  Kokkos::deep_copy (exec_instance, lgMapHost, lgMap);
1713 
1714  // There's a non-trivial chance we'll grab this on the host,
1715  // so let's make sure the copy finishes
1716  exec_instance.fence();
1717 
1718  // "Commit" the local-to-global lookup table we filled in above.
1719  lgMap_ = lgMap;
1720  lgMapHost_ = lgMapHost;
1721  }
1722  else {
1723  lazyPushToHost();
1724  }
1725 
1726  if (verbose) {
1727  std::ostringstream os;
1728  os << *prefix << "Done" << endl;
1729  std::cerr << os.str();
1730  }
1731  return lgMapHost_;
1732  }
1733 
1734 
1735  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1736  typename Map<LocalOrdinal,GlobalOrdinal,Node>::global_indices_array_device_type
1738  {
1739  using std::endl;
1740  using LO = local_ordinal_type;
1741  using GO = global_ordinal_type;
1742  using const_lg_view_type = decltype(lgMap_);
1743  using lg_view_type = typename const_lg_view_type::non_const_type;
1744  const bool debug = Details::Behavior::debug("Map");
1745  const bool verbose = Details::Behavior::verbose("Map");
1746 
1747  std::unique_ptr<std::string> prefix;
1748  if (verbose) {
1749  prefix = Details::createPrefix(
1750  comm_.getRawPtr(), "Map", "getMyGlobalIndicesDevice");
1751  std::ostringstream os;
1752  os << *prefix << "Start" << endl;
1753  std::cerr << os.str();
1754  }
1755 
1756  // If the local-to-global mapping doesn't exist yet, and if we
1757  // have local entries, then create and fill the local-to-global
1758  // mapping.
1759  const bool needToCreateLocalToGlobalMapping =
1760  lgMap_.extent (0) == 0 && numLocalElements_ > 0;
1761 
1762  if (needToCreateLocalToGlobalMapping) {
1763  if (verbose) {
1764  std::ostringstream os;
1765  os << *prefix << "Need to create lgMap" << endl;
1766  std::cerr << os.str();
1767  }
1768  if (debug) {
1769  // The local-to-global mapping should have been set up already
1770  // for a noncontiguous map.
1771  TEUCHOS_TEST_FOR_EXCEPTION
1772  (! isContiguous(), std::logic_error,
1773  "Tpetra::Map::getMyGlobalIndices: The local-to-global "
1774  "mapping (lgMap_) should have been set up already for a "
1775  "noncontiguous Map. Please report this bug to the Tpetra "
1776  "developers.");
1777  }
1778  const LO numElts = static_cast<LO> (getLocalNumElements ());
1779 
1780  using Kokkos::view_alloc;
1781  using Kokkos::WithoutInitializing;
1782  lg_view_type lgMap ("lgMap", numElts);
1783  if (verbose) {
1784  std::ostringstream os;
1785  os << *prefix << "Fill lgMap" << endl;
1786  std::cerr << os.str();
1787  }
1788  FillLgMap<LO, GO, device_type> fillIt (lgMap, minMyGID_);
1789 
1790  // "Commit" the local-to-global lookup table we filled in above.
1791  lgMap_ = lgMap;
1792  }
1793 
1794  if (verbose) {
1795  std::ostringstream os;
1796  os << *prefix << "Done" << endl;
1797  std::cerr << os.str();
1798  }
1799  return lgMap_;
1800  }
1801 
1802 
1803 
1804  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1805  Teuchos::ArrayView<const GlobalOrdinal>
1807  {
1808  using GO = global_ordinal_type;
1809 
1810  // If the local-to-global mapping doesn't exist yet, and if we
1811  // have local entries, then create and fill the local-to-global
1812  // mapping.
1813  (void) this->getMyGlobalIndices ();
1814 
1815  // This does NOT assume UVM; lgMapHost_ is a host pointer.
1816  lazyPushToHost();
1817  const GO* lgMapHostRawPtr = lgMapHost_.data ();
1818  // The third argument forces ArrayView not to try to track memory
1819  // in a debug build. We have to use it because the memory does
1820  // not belong to a Teuchos memory management class.
1821  return Teuchos::ArrayView<const GO>(
1822  lgMapHostRawPtr,
1823  lgMapHost_.extent (0),
1824  Teuchos::RCP_DISABLE_NODE_LOOKUP);
1825  }
1826 
1827  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1829  return distributed_;
1830  }
1831 
1832  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1834  using Teuchos::TypeNameTraits;
1835  std::ostringstream os;
1836 
1837  os << "Tpetra::Map: {"
1838  << "LocalOrdinalType: " << TypeNameTraits<LocalOrdinal>::name ()
1839  << ", GlobalOrdinalType: " << TypeNameTraits<GlobalOrdinal>::name ()
1840  << ", NodeType: " << TypeNameTraits<Node>::name ();
1841  if (this->getObjectLabel () != "") {
1842  os << ", Label: \"" << this->getObjectLabel () << "\"";
1843  }
1844  os << ", Global number of entries: " << getGlobalNumElements ()
1845  << ", Number of processes: " << getComm ()->getSize ()
1846  << ", Uniform: " << (isUniform () ? "true" : "false")
1847  << ", Contiguous: " << (isContiguous () ? "true" : "false")
1848  << ", Distributed: " << (isDistributed () ? "true" : "false")
1849  << "}";
1850  return os.str ();
1851  }
1852 
1857  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1858  std::string
1860  localDescribeToString (const Teuchos::EVerbosityLevel vl) const
1861  {
1862  using LO = local_ordinal_type;
1863  using std::endl;
1864 
1865  // This preserves current behavior of Map.
1866  if (vl < Teuchos::VERB_HIGH) {
1867  return std::string ();
1868  }
1869  auto outStringP = Teuchos::rcp (new std::ostringstream ());
1870  Teuchos::RCP<Teuchos::FancyOStream> outp =
1871  Teuchos::getFancyOStream (outStringP);
1872  Teuchos::FancyOStream& out = *outp;
1873 
1874  auto comm = this->getComm ();
1875  const int myRank = comm->getRank ();
1876  const int numProcs = comm->getSize ();
1877  out << "Process " << myRank << " of " << numProcs << ":" << endl;
1878  Teuchos::OSTab tab1 (out);
1879 
1880  const LO numEnt = static_cast<LO> (this->getLocalNumElements ());
1881  out << "My number of entries: " << numEnt << endl
1882  << "My minimum global index: " << this->getMinGlobalIndex () << endl
1883  << "My maximum global index: " << this->getMaxGlobalIndex () << endl;
1884 
1885  if (vl == Teuchos::VERB_EXTREME) {
1886  out << "My global indices: [";
1887  const LO minLclInd = this->getMinLocalIndex ();
1888  for (LO k = 0; k < numEnt; ++k) {
1889  out << minLclInd + this->getGlobalElement (k);
1890  if (k + 1 < numEnt) {
1891  out << ", ";
1892  }
1893  }
1894  out << "]" << endl;
1895  }
1896 
1897  out.flush (); // make sure the ostringstream got everything
1898  return outStringP->str ();
1899  }
1900 
1901  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1902  void
1904  describe (Teuchos::FancyOStream &out,
1905  const Teuchos::EVerbosityLevel verbLevel) const
1906  {
1907  using Teuchos::TypeNameTraits;
1908  using Teuchos::VERB_DEFAULT;
1909  using Teuchos::VERB_NONE;
1910  using Teuchos::VERB_LOW;
1911  using Teuchos::VERB_HIGH;
1912  using std::endl;
1913  using LO = local_ordinal_type;
1914  using GO = global_ordinal_type;
1915  const Teuchos::EVerbosityLevel vl =
1916  (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
1917 
1918  if (vl == VERB_NONE) {
1919  return; // don't print anything
1920  }
1921  // If this Map's Comm is null, then the Map does not participate
1922  // in collective operations with the other processes. In that
1923  // case, it is not even legal to call this method. The reasonable
1924  // thing to do in that case is nothing.
1925  auto comm = this->getComm ();
1926  if (comm.is_null ()) {
1927  return;
1928  }
1929  const int myRank = comm->getRank ();
1930  const int numProcs = comm->getSize ();
1931 
1932  // Only Process 0 should touch the output stream, but this method
1933  // in general may need to do communication. Thus, we may need to
1934  // preserve the current tab level across multiple "if (myRank ==
1935  // 0) { ... }" inner scopes. This is why we sometimes create
1936  // OSTab instances by pointer, instead of by value. We only need
1937  // to create them by pointer if the tab level must persist through
1938  // multiple inner scopes.
1939  Teuchos::RCP<Teuchos::OSTab> tab0, tab1;
1940 
1941  if (myRank == 0) {
1942  // At every verbosity level but VERB_NONE, Process 0 prints.
1943  // By convention, describe() always begins with a tab before
1944  // printing.
1945  tab0 = Teuchos::rcp (new Teuchos::OSTab (out));
1946  out << "\"Tpetra::Map\":" << endl;
1947  tab1 = Teuchos::rcp (new Teuchos::OSTab (out));
1948  {
1949  out << "Template parameters:" << endl;
1950  Teuchos::OSTab tab2 (out);
1951  out << "LocalOrdinal: " << TypeNameTraits<LO>::name () << endl
1952  << "GlobalOrdinal: " << TypeNameTraits<GO>::name () << endl
1953  << "Node: " << TypeNameTraits<Node>::name () << endl;
1954  }
1955  const std::string label = this->getObjectLabel ();
1956  if (label != "") {
1957  out << "Label: \"" << label << "\"" << endl;
1958  }
1959  out << "Global number of entries: " << getGlobalNumElements () << endl
1960  << "Minimum global index: " << getMinAllGlobalIndex () << endl
1961  << "Maximum global index: " << getMaxAllGlobalIndex () << endl
1962  << "Index base: " << getIndexBase () << endl
1963  << "Number of processes: " << numProcs << endl
1964  << "Uniform: " << (isUniform () ? "true" : "false") << endl
1965  << "Contiguous: " << (isContiguous () ? "true" : "false") << endl
1966  << "Distributed: " << (isDistributed () ? "true" : "false") << endl;
1967  }
1968 
1969  // This is collective over the Map's communicator.
1970  if (vl >= VERB_HIGH) { // VERB_HIGH or VERB_EXTREME
1971  const std::string lclStr = this->localDescribeToString (vl);
1972  Tpetra::Details::gathervPrint (out, lclStr, *comm);
1973  }
1974  }
1975 
1976  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1977  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
1979  replaceCommWithSubset (const Teuchos::RCP<const Teuchos::Comm<int> >& newComm) const
1980  {
1981  using Teuchos::RCP;
1982  using Teuchos::rcp;
1983  using GST = global_size_t;
1984  using LO = local_ordinal_type;
1985  using GO = global_ordinal_type;
1986  using map_type = Map<LO, GO, Node>;
1987 
1988  // mfh 26 Mar 2013: The lazy way to do this is simply to recreate
1989  // the Map by calling its ordinary public constructor, using the
1990  // original Map's data. This only involves O(1) all-reduces over
1991  // the new communicator, which in the common case only includes a
1992  // small number of processes.
1993 
1994  // Create the Map to return.
1995  if (newComm.is_null () || newComm->getSize () < 1) {
1996  return Teuchos::null; // my process does not participate in the new Map
1997  }
1998  else if (newComm->getSize () == 1) {
1999  lazyPushToHost();
2000 
2001  // The case where the new communicator has only one process is
2002  // easy. We don't have to communicate to get all the
2003  // information we need. Use the default comm to create the new
2004  // Map, then fill in all the fields directly.
2005  RCP<map_type> newMap (new map_type ());
2006 
2007  newMap->comm_ = newComm;
2008  // mfh 07 Oct 2016: Preserve original behavior, even though the
2009  // original index base may no longer be the globally min global
2010  // index. See #616 for why this doesn't matter so much anymore.
2011  newMap->indexBase_ = this->indexBase_;
2012  newMap->numGlobalElements_ = this->numLocalElements_;
2013  newMap->numLocalElements_ = this->numLocalElements_;
2014  newMap->minMyGID_ = this->minMyGID_;
2015  newMap->maxMyGID_ = this->maxMyGID_;
2016  newMap->minAllGID_ = this->minMyGID_;
2017  newMap->maxAllGID_ = this->maxMyGID_;
2018  newMap->firstContiguousGID_ = this->firstContiguousGID_;
2019  newMap->lastContiguousGID_ = this->lastContiguousGID_;
2020  // Since the new communicator has only one process, neither
2021  // uniformity nor contiguity have changed.
2022  newMap->uniform_ = this->uniform_;
2023  newMap->contiguous_ = this->contiguous_;
2024  // The new communicator only has one process, so the new Map is
2025  // not distributed.
2026  newMap->distributed_ = false;
2027  newMap->lgMap_ = this->lgMap_;
2028  newMap->lgMapHost_ = this->lgMapHost_;
2029  newMap->glMap_ = this->glMap_;
2030  newMap->glMapHost_ = this->glMapHost_;
2031  // It's OK not to initialize the new Map's Directory.
2032  // This is initialized lazily, on first call to getRemoteIndexList.
2033 
2034  return newMap;
2035  }
2036  else { // newComm->getSize() != 1
2037  // Even if the original Map is contiguous, the new Map might not
2038  // be, especially if the excluded processes have ranks != 0 or
2039  // newComm->getSize()-1. The common case for this method is to
2040  // exclude many (possibly even all but one) processes, so it
2041  // likely doesn't pay to do the global communication (over the
2042  // original communicator) to figure out whether we can optimize
2043  // the result Map. Thus, we just set up the result Map as
2044  // noncontiguous.
2045  //
2046  // TODO (mfh 07 Oct 2016) We don't actually need to reconstruct
2047  // the global-to-local table, etc. Optimize this code path to
2048  // avoid unnecessary local work.
2049 
2050  // Make Map (re)compute the global number of elements.
2051  const GST RECOMPUTE = Tpetra::Details::OrdinalTraits<GST>::invalid ();
2052  // TODO (mfh 07 Oct 2016) If we use any Map constructor, we have
2053  // to use the noncontiguous Map constructor, since the new Map
2054  // might not be contiguous. Even if the old Map was contiguous,
2055  // some process in the "middle" might have been excluded. If we
2056  // want to avoid local work, we either have to do the setup by
2057  // hand, or write a new Map constructor.
2058 #if 1
2059  // The disabled code here throws the following exception in
2060  // Map's replaceCommWithSubset test:
2061  //
2062  // Throw test that evaluated to true: static_cast<unsigned long long> (numKeys) > static_cast<unsigned long long> (::Kokkos::ArithTraits<ValueType>::max ())
2063  // 10:
2064  // 10: Tpetra::Details::FixedHashTable: The number of keys -3 is greater than the maximum representable ValueType value 2147483647. This means that it is not possible to use this constructor.
2065  // 10: Process 3: origComm->replaceCommWithSubset(subsetComm) threw an exception: /scratch/prj/Trilinos/Trilinos/packages/tpetra/core/src/Tpetra_Details_FixedHashTable_def.hpp:1044:
2066 
2067  auto lgMap = this->getMyGlobalIndices ();
2068  using size_type =
2069  typename std::decay<decltype (lgMap.extent (0)) >::type;
2070  const size_type lclNumInds =
2071  static_cast<size_type> (this->getLocalNumElements ());
2072  using Teuchos::TypeNameTraits;
2073  TEUCHOS_TEST_FOR_EXCEPTION
2074  (lgMap.extent (0) != lclNumInds, std::logic_error,
2075  "Tpetra::Map::replaceCommWithSubset: Result of getMyGlobalIndices() "
2076  "has length " << lgMap.extent (0) << " (of type " <<
2077  TypeNameTraits<size_type>::name () << ") != this->getLocalNumElements()"
2078  " = " << this->getLocalNumElements () << ". The latter, upon being "
2079  "cast to size_type = " << TypeNameTraits<size_type>::name () << ", "
2080  "becomes " << lclNumInds << ". Please report this bug to the Tpetra "
2081  "developers.");
2082 #else
2083  Teuchos::ArrayView<const GO> lgMap = this->getLocalElementList ();
2084 #endif // 1
2085 
2086  const GO indexBase = this->getIndexBase ();
2087  // map stores HostSpace of CudaSpace but constructor is still CudaUVMSpace
2088  auto lgMap_device = Kokkos::create_mirror_view_and_copy(device_type(), lgMap);
2089  return rcp (new map_type (RECOMPUTE, lgMap_device, indexBase, newComm));
2090  }
2091  }
2092 
2093  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2094  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
2097  {
2098  using Teuchos::Comm;
2099  using Teuchos::null;
2100  using Teuchos::outArg;
2101  using Teuchos::RCP;
2102  using Teuchos::rcp;
2103  using Teuchos::REDUCE_MIN;
2104  using Teuchos::reduceAll;
2105 
2106  // Create the new communicator. split() returns a valid
2107  // communicator on all processes. On processes where color == 0,
2108  // ignore the result. Passing key == 0 tells MPI to order the
2109  // processes in the new communicator by their rank in the old
2110  // communicator.
2111  const int color = (numLocalElements_ == 0) ? 0 : 1;
2112  // MPI_Comm_split must be called collectively over the original
2113  // communicator. We can't just call it on processes with color
2114  // one, even though we will ignore its result on processes with
2115  // color zero.
2116  RCP<const Comm<int> > newComm = comm_->split (color, 0);
2117  if (color == 0) {
2118  newComm = null;
2119  }
2120 
2121  // Create the Map to return.
2122  if (newComm.is_null ()) {
2123  return null; // my process does not participate in the new Map
2124  } else {
2125  RCP<Map> map = rcp (new Map ());
2126 
2127  map->comm_ = newComm;
2128  map->indexBase_ = indexBase_;
2129  map->numGlobalElements_ = numGlobalElements_;
2130  map->numLocalElements_ = numLocalElements_;
2131  map->minMyGID_ = minMyGID_;
2132  map->maxMyGID_ = maxMyGID_;
2133  map->minAllGID_ = minAllGID_;
2134  map->maxAllGID_ = maxAllGID_;
2135  map->firstContiguousGID_= firstContiguousGID_;
2136  map->lastContiguousGID_ = lastContiguousGID_;
2137 
2138  // Uniformity and contiguity have not changed. The directory
2139  // has changed, but we've taken care of that above.
2140  map->uniform_ = uniform_;
2141  map->contiguous_ = contiguous_;
2142 
2143  // If the original Map was NOT distributed, then the new Map
2144  // cannot be distributed.
2145  //
2146  // If the number of processes in the new communicator is 1, then
2147  // the new Map is not distributed.
2148  //
2149  // Otherwise, we have to check the new Map using an all-reduce
2150  // (over the new communicator). For example, the original Map
2151  // may have had some processes with zero elements, and all other
2152  // processes with the same number of elements as in the whole
2153  // Map. That Map is technically distributed, because of the
2154  // processes with zero elements. Removing those processes would
2155  // make the new Map locally replicated.
2156  if (! distributed_ || newComm->getSize () == 1) {
2157  map->distributed_ = false;
2158  } else {
2159  const int iOwnAllGids = (numLocalElements_ == numGlobalElements_) ? 1 : 0;
2160  int allProcsOwnAllGids = 0;
2161  reduceAll<int, int> (*newComm, REDUCE_MIN, iOwnAllGids, outArg (allProcsOwnAllGids));
2162  map->distributed_ = (allProcsOwnAllGids == 1) ? false : true;
2163  }
2164 
2165  map->lgMap_ = lgMap_;
2166  map->lgMapHost_ = lgMapHost_;
2167  map->glMap_ = glMap_;
2168  map->glMapHost_ = glMapHost_;
2169 
2170  // Map's default constructor creates an uninitialized Directory.
2171  // The Directory will be initialized on demand in
2172  // getRemoteIndexList().
2173  //
2174  // FIXME (mfh 26 Mar 2013) It should be possible to "filter" the
2175  // directory more efficiently than just recreating it. If
2176  // directory recreation proves a bottleneck, we can always
2177  // revisit this. On the other hand, Directory creation is only
2178  // collective over the new, presumably much smaller
2179  // communicator, so it may not be worth the effort to optimize.
2180 
2181  return map;
2182  }
2183  }
2184 
2185  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2186  void
2188  {
2189  TEUCHOS_TEST_FOR_EXCEPTION(
2190  directory_.is_null (), std::logic_error, "Tpetra::Map::setupDirectory: "
2191  "The Directory is null. "
2192  "Please report this bug to the Tpetra developers.");
2193 
2194  // Only create the Directory if it hasn't been created yet.
2195  // This is a collective operation.
2196  if (! directory_->initialized ()) {
2197  directory_->initialize (*this);
2198  }
2199  }
2200 
2201  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2202  LookupStatus
2204  getRemoteIndexList (const Teuchos::ArrayView<const GlobalOrdinal>& GIDs,
2205  const Teuchos::ArrayView<int>& PIDs,
2206  const Teuchos::ArrayView<LocalOrdinal>& LIDs) const
2207  {
2208  using Tpetra::Details::OrdinalTraits;
2210  using std::endl;
2211  using size_type = Teuchos::ArrayView<int>::size_type;
2212 
2213  const bool verbose = Details::Behavior::verbose("Map");
2214  const size_t maxNumToPrint = verbose ?
2216  std::unique_ptr<std::string> prefix;
2217  if (verbose) {
2218  prefix = Details::createPrefix(comm_.getRawPtr(),
2219  "Map", "getRemoteIndexList(GIDs,PIDs,LIDs)");
2220  std::ostringstream os;
2221  os << *prefix << "Start: ";
2222  verbosePrintArray(os, GIDs, "GIDs", maxNumToPrint);
2223  os << endl;
2224  std::cerr << os.str();
2225  }
2226 
2227  // Empty Maps (i.e., containing no indices on any processes in the
2228  // Map's communicator) are perfectly valid. In that case, if the
2229  // input GID list is nonempty, we fill the output arrays with
2230  // invalid values, and return IDNotPresent to notify the caller.
2231  // It's perfectly valid to give getRemoteIndexList GIDs that the
2232  // Map doesn't own. SubmapImport test 2 needs this functionality.
2233  if (getGlobalNumElements () == 0) {
2234  if (GIDs.size () == 0) {
2235  if (verbose) {
2236  std::ostringstream os;
2237  os << *prefix << "Done; both Map & input are empty" << endl;
2238  std::cerr << os.str();
2239  }
2240  return AllIDsPresent; // trivially
2241  }
2242  else {
2243  if (verbose) {
2244  std::ostringstream os;
2245  os << *prefix << "Done: Map is empty on all processes, "
2246  "so all output PIDs & LIDs are invalid (-1)." << endl;
2247  std::cerr << os.str();
2248  }
2249  for (size_type k = 0; k < PIDs.size (); ++k) {
2250  PIDs[k] = OrdinalTraits<int>::invalid ();
2251  }
2252  for (size_type k = 0; k < LIDs.size (); ++k) {
2253  LIDs[k] = OrdinalTraits<LocalOrdinal>::invalid ();
2254  }
2255  return IDNotPresent;
2256  }
2257  }
2258 
2259  // getRemoteIndexList must be called collectively, and Directory
2260  // initialization is collective too, so it's OK to initialize the
2261  // Directory on demand.
2262 
2263  if (verbose) {
2264  std::ostringstream os;
2265  os << *prefix << "Call setupDirectory" << endl;
2266  std::cerr << os.str();
2267  }
2268  setupDirectory();
2269  if (verbose) {
2270  std::ostringstream os;
2271  os << *prefix << "Call directory_->getDirectoryEntries" << endl;
2272  std::cerr << os.str();
2273  }
2274  const Tpetra::LookupStatus retVal =
2275  directory_->getDirectoryEntries (*this, GIDs, PIDs, LIDs);
2276  if (verbose) {
2277  std::ostringstream os;
2278  os << *prefix << "Done; getDirectoryEntries returned "
2279  << (retVal == IDNotPresent ? "IDNotPresent" : "AllIDsPresent")
2280  << "; ";
2281  verbosePrintArray(os, PIDs, "PIDs", maxNumToPrint);
2282  os << ", ";
2283  verbosePrintArray(os, LIDs, "LIDs", maxNumToPrint);
2284  os << endl;
2285  std::cerr << os.str();
2286  }
2287  return retVal;
2288  }
2289 
2290  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2291  LookupStatus
2293  getRemoteIndexList (const Teuchos::ArrayView<const GlobalOrdinal> & GIDs,
2294  const Teuchos::ArrayView<int> & PIDs) const
2295  {
2297  using std::endl;
2298 
2299  const bool verbose = Details::Behavior::verbose("Map");
2300  const size_t maxNumToPrint = verbose ?
2302  std::unique_ptr<std::string> prefix;
2303  if (verbose) {
2304  prefix = Details::createPrefix(comm_.getRawPtr(),
2305  "Map", "getRemoteIndexList(GIDs,PIDs)");
2306  std::ostringstream os;
2307  os << *prefix << "Start: ";
2308  verbosePrintArray(os, GIDs, "GIDs", maxNumToPrint);
2309  os << endl;
2310  std::cerr << os.str();
2311  }
2312 
2313  if (getGlobalNumElements () == 0) {
2314  if (GIDs.size () == 0) {
2315  if (verbose) {
2316  std::ostringstream os;
2317  os << *prefix << "Done; both Map & input are empty" << endl;
2318  std::cerr << os.str();
2319  }
2320  return AllIDsPresent; // trivially
2321  }
2322  else {
2323  if (verbose) {
2324  std::ostringstream os;
2325  os << *prefix << "Done: Map is empty on all processes, "
2326  "so all output PIDs are invalid (-1)." << endl;
2327  std::cerr << os.str();
2328  }
2329  for (Teuchos::ArrayView<int>::size_type k = 0; k < PIDs.size (); ++k) {
2330  PIDs[k] = Tpetra::Details::OrdinalTraits<int>::invalid ();
2331  }
2332  return IDNotPresent;
2333  }
2334  }
2335 
2336  // getRemoteIndexList must be called collectively, and Directory
2337  // initialization is collective too, so it's OK to initialize the
2338  // Directory on demand.
2339 
2340  if (verbose) {
2341  std::ostringstream os;
2342  os << *prefix << "Call setupDirectory" << endl;
2343  std::cerr << os.str();
2344  }
2345  setupDirectory();
2346  if (verbose) {
2347  std::ostringstream os;
2348  os << *prefix << "Call directory_->getDirectoryEntries" << endl;
2349  std::cerr << os.str();
2350  }
2351  const Tpetra::LookupStatus retVal =
2352  directory_->getDirectoryEntries(*this, GIDs, PIDs);
2353  if (verbose) {
2354  std::ostringstream os;
2355  os << *prefix << "Done; getDirectoryEntries returned "
2356  << (retVal == IDNotPresent ? "IDNotPresent" : "AllIDsPresent")
2357  << "; ";
2358  verbosePrintArray(os, PIDs, "PIDs", maxNumToPrint);
2359  os << endl;
2360  std::cerr << os.str();
2361  }
2362  return retVal;
2363  }
2364 
2365  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2366  void
2368  using exec_space = typename Node::device_type::execution_space;
2369  if(lgMap_.extent(0) != lgMapHost_.extent(0)) {
2370  Tpetra::Details::ProfilingRegion pr("Map::lazyPushToHost() - pushing data");
2371  // NOTE: We check lgMap_ and not glMap_, since the latter can
2372  // be somewhat error prone for contiguous maps
2373 
2374  // create_mirror_view preserves const-ness. create_mirror does not
2375  auto lgMap_host = Kokkos::create_mirror(Kokkos::HostSpace (), lgMap_);
2376 
2377  // Since this was computed on the default stream, we can copy on the stream and then fence
2378  // the stream
2379  Kokkos::deep_copy(exec_space(),lgMap_host,lgMap_);
2380  exec_space().fence();
2381  lgMapHost_ = lgMap_host;
2382 
2383  // Make host version - when memory spaces match these just do trivial assignment
2384  glMapHost_ = global_to_local_table_host_type(glMap_);
2385  }
2386  }
2387 
2388 
2389  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2390  Teuchos::RCP<const Teuchos::Comm<int> >
2392  return comm_;
2393  }
2394 
2395  template <class LocalOrdinal,class GlobalOrdinal, class Node>
2396  bool
2398  checkIsDist() const
2399  {
2400  using Teuchos::as;
2401  using Teuchos::outArg;
2402  using Teuchos::REDUCE_MIN;
2403  using Teuchos::reduceAll;
2404  using std::endl;
2405 
2406  const bool verbose = Details::Behavior::verbose("Map");
2407  std::unique_ptr<std::string> prefix;
2408  if (verbose) {
2409  prefix = Details::createPrefix(
2410  comm_.getRawPtr(), "Map", "checkIsDist");
2411  std::ostringstream os;
2412  os << *prefix << "Start" << endl;
2413  std::cerr << os.str();
2414  }
2415 
2416  bool global = false;
2417  if (comm_->getSize () > 1) {
2418  // The communicator has more than one process, but that doesn't
2419  // necessarily mean the Map is distributed.
2420  int localRep = 0;
2421  if (numGlobalElements_ == as<global_size_t> (numLocalElements_)) {
2422  // The number of local elements on this process equals the
2423  // number of global elements.
2424  //
2425  // NOTE (mfh 22 Nov 2011) Does this still work if there were
2426  // duplicates in the global ID list on input (the third Map
2427  // constructor), so that the number of local elements (which
2428  // are not duplicated) on this process could be less than the
2429  // number of global elements, even if this process owns all
2430  // the elements?
2431  localRep = 1;
2432  }
2433  int allLocalRep;
2434  reduceAll<int, int> (*comm_, REDUCE_MIN, localRep, outArg (allLocalRep));
2435  if (allLocalRep != 1) {
2436  // At least one process does not own all the elements.
2437  // This makes the Map a distributed Map.
2438  global = true;
2439  }
2440  }
2441  // If the communicator has only one process, then the Map is not
2442  // distributed.
2443 
2444  if (verbose) {
2445  std::ostringstream os;
2446  os << *prefix << "Done; global=" << (global ? "true" : "false")
2447  << endl;
2448  std::cerr << os.str();
2449  }
2450  return global;
2451  }
2452 
2453 } // namespace Tpetra
2454 
2455 template <class LocalOrdinal, class GlobalOrdinal>
2456 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal> >
2457 Tpetra::createLocalMap (const size_t numElements,
2458  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2459 {
2460  typedef LocalOrdinal LO;
2461  typedef GlobalOrdinal GO;
2462  using NT = typename ::Tpetra::Map<LO, GO>::node_type;
2463  return createLocalMapWithNode<LO, GO, NT> (numElements, comm);
2464 }
2465 
2466 template <class LocalOrdinal, class GlobalOrdinal>
2467 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal> >
2468 Tpetra::createUniformContigMap (const global_size_t numElements,
2469  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2470 {
2471  typedef LocalOrdinal LO;
2472  typedef GlobalOrdinal GO;
2473  using NT = typename ::Tpetra::Map<LO, GO>::node_type;
2474  return createUniformContigMapWithNode<LO, GO, NT> (numElements, comm);
2475 }
2476 
2477 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2478 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node> >
2479 Tpetra::createUniformContigMapWithNode (const global_size_t numElements,
2480  const Teuchos::RCP<const Teuchos::Comm<int> >& comm
2481 )
2482 {
2483  using Teuchos::rcp;
2485  const GlobalOrdinal indexBase = static_cast<GlobalOrdinal> (0);
2486 
2487  return rcp (new map_type (numElements, indexBase, comm, GloballyDistributed));
2488 }
2489 
2490 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2491 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node> >
2492 Tpetra::createLocalMapWithNode (const size_t numElements,
2493  const Teuchos::RCP<const Teuchos::Comm<int> >& comm
2494 )
2495 {
2496  using Tpetra::global_size_t;
2497  using Teuchos::rcp;
2499  const GlobalOrdinal indexBase = 0;
2500  const global_size_t globalNumElts = static_cast<global_size_t> (numElements);
2501 
2502  return rcp (new map_type (globalNumElts, indexBase, comm, LocallyReplicated));
2503 }
2504 
2505 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2506 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node> >
2508  const size_t localNumElements,
2509  const Teuchos::RCP<const Teuchos::Comm<int> >& comm
2510 )
2511 {
2512  using Teuchos::rcp;
2514  const GlobalOrdinal indexBase = 0;
2515 
2516  return rcp (new map_type (numElements, localNumElements, indexBase, comm));
2517 }
2518 
2519 template <class LocalOrdinal, class GlobalOrdinal>
2520 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal> >
2522  const size_t localNumElements,
2523  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2524 {
2525  typedef LocalOrdinal LO;
2526  typedef GlobalOrdinal GO;
2527  using NT = typename Tpetra::Map<LO, GO>::node_type;
2528 
2529  return Tpetra::createContigMapWithNode<LO, GO, NT> (numElements, localNumElements, comm);
2530 }
2531 
2532 template <class LocalOrdinal, class GlobalOrdinal>
2533 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal> >
2534 Tpetra::createNonContigMap(const Teuchos::ArrayView<const GlobalOrdinal>& elementList,
2535  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2536 {
2537  typedef LocalOrdinal LO;
2538  typedef GlobalOrdinal GO;
2539  using NT = typename Tpetra::Map<LO, GO>::node_type;
2540 
2541  return Tpetra::createNonContigMapWithNode<LO, GO, NT> (elementList, comm);
2542 }
2543 
2544 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2545 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node> >
2546 Tpetra::createNonContigMapWithNode (const Teuchos::ArrayView<const GlobalOrdinal>& elementList,
2547  const Teuchos::RCP<const Teuchos::Comm<int> >& comm
2548 )
2549 {
2550  using Teuchos::rcp;
2552  using GST = Tpetra::global_size_t;
2553  const GST INV = Tpetra::Details::OrdinalTraits<GST>::invalid ();
2554  // FIXME (mfh 22 Jul 2016) This is what I found here, but maybe this
2555  // shouldn't be zero, given that the index base is supposed to equal
2556  // the globally min global index?
2557  const GlobalOrdinal indexBase = 0;
2558 
2559  return rcp (new map_type (INV, elementList, indexBase, comm));
2560 }
2561 
2562 template<class LO, class GO, class NT>
2563 Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >
2564 Tpetra::createOneToOne (const Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >& M)
2565 {
2567  using Teuchos::Array;
2568  using Teuchos::ArrayView;
2569  using Teuchos::as;
2570  using Teuchos::rcp;
2571  using std::cerr;
2572  using std::endl;
2573  using map_type = Tpetra::Map<LO, GO, NT>;
2574  using GST = global_size_t;
2575 
2576  const bool verbose = Details::Behavior::verbose("Map");
2577  std::unique_ptr<std::string> prefix;
2578  if (verbose) {
2579  auto comm = M.is_null() ? Teuchos::null : M->getComm();
2580  prefix = Details::createPrefix(
2581  comm.getRawPtr(), "createOneToOne(Map)");
2582  std::ostringstream os;
2583  os << *prefix << "Start" << endl;
2584  cerr << os.str();
2585  }
2586  const size_t maxNumToPrint = verbose ?
2587  Details::Behavior::verbosePrintCountThreshold() : size_t(0);
2588  const GST GINV = Tpetra::Details::OrdinalTraits<GST>::invalid ();
2589  const int myRank = M->getComm ()->getRank ();
2590 
2591  // Bypasses for special cases where either M is known to be
2592  // one-to-one, or the one-to-one version of M is easy to compute.
2593  // This is why we take M as an RCP, not as a const reference -- so
2594  // that we can return M itself if it is 1-to-1.
2595  if (! M->isDistributed ()) {
2596  // For a locally replicated Map, we assume that users want to push
2597  // all the GIDs to Process 0.
2598 
2599  // mfh 05 Nov 2013: getGlobalNumElements() does indeed return what
2600  // you think it should return, in this special case of a locally
2601  // replicated contiguous Map.
2602  const GST numGlobalEntries = M->getGlobalNumElements ();
2603  if (M->isContiguous()) {
2604  const size_t numLocalEntries =
2605  (myRank == 0) ? as<size_t>(numGlobalEntries) : size_t(0);
2606  if (verbose) {
2607  std::ostringstream os;
2608  os << *prefix << "Input is locally replicated & contiguous; "
2609  "numLocalEntries=" << numLocalEntries << endl;
2610  cerr << os.str ();
2611  }
2612  auto retMap =
2613  rcp(new map_type(numGlobalEntries, numLocalEntries,
2614  M->getIndexBase(), M->getComm()));
2615  if (verbose) {
2616  std::ostringstream os;
2617  os << *prefix << "Done" << endl;
2618  cerr << os.str ();
2619  }
2620  return retMap;
2621  }
2622  else {
2623  if (verbose) {
2624  std::ostringstream os;
2625  os << *prefix << "Input is locally replicated & noncontiguous"
2626  << endl;
2627  cerr << os.str ();
2628  }
2629  ArrayView<const GO> myGids =
2630  (myRank == 0) ? M->getLocalElementList() : Teuchos::null;
2631  auto retMap =
2632  rcp(new map_type(GINV, myGids(), M->getIndexBase(),
2633  M->getComm()));
2634  if (verbose) {
2635  std::ostringstream os;
2636  os << *prefix << "Done" << endl;
2637  cerr << os.str ();
2638  }
2639  return retMap;
2640  }
2641  }
2642  else if (M->isContiguous ()) {
2643  if (verbose) {
2644  std::ostringstream os;
2645  os << *prefix << "Input is distributed & contiguous" << endl;
2646  cerr << os.str ();
2647  }
2648  // Contiguous, distributed Maps are one-to-one by construction.
2649  // (Locally replicated Maps can be contiguous.)
2650  return M;
2651  }
2652  else {
2653  if (verbose) {
2654  std::ostringstream os;
2655  os << *prefix << "Input is distributed & noncontiguous" << endl;
2656  cerr << os.str ();
2657  }
2659  const size_t numMyElems = M->getLocalNumElements ();
2660  ArrayView<const GO> myElems = M->getLocalElementList ();
2661  Array<int> owner_procs_vec (numMyElems);
2662 
2663  if (verbose) {
2664  std::ostringstream os;
2665  os << *prefix << "Call Directory::getDirectoryEntries: ";
2666  verbosePrintArray(os, myElems, "GIDs", maxNumToPrint);
2667  os << endl;
2668  cerr << os.str();
2669  }
2670  directory.getDirectoryEntries (*M, myElems, owner_procs_vec ());
2671  if (verbose) {
2672  std::ostringstream os;
2673  os << *prefix << "getDirectoryEntries result: ";
2674  verbosePrintArray(os, owner_procs_vec, "PIDs", maxNumToPrint);
2675  os << endl;
2676  cerr << os.str();
2677  }
2678 
2679  Array<GO> myOwned_vec (numMyElems);
2680  size_t numMyOwnedElems = 0;
2681  for (size_t i = 0; i < numMyElems; ++i) {
2682  const GO GID = myElems[i];
2683  const int owner = owner_procs_vec[i];
2684 
2685  if (myRank == owner) {
2686  myOwned_vec[numMyOwnedElems++] = GID;
2687  }
2688  }
2689  myOwned_vec.resize (numMyOwnedElems);
2690 
2691  if (verbose) {
2692  std::ostringstream os;
2693  os << *prefix << "Create Map: ";
2694  verbosePrintArray(os, myOwned_vec, "GIDs", maxNumToPrint);
2695  os << endl;
2696  cerr << os.str();
2697  }
2698  auto retMap = rcp(new map_type(GINV, myOwned_vec(),
2699  M->getIndexBase(), M->getComm()));
2700  if (verbose) {
2701  std::ostringstream os;
2702  os << *prefix << "Done" << endl;
2703  cerr << os.str();
2704  }
2705  return retMap;
2706  }
2707 }
2708 
2709 template<class LocalOrdinal, class GlobalOrdinal, class Node>
2710 Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >
2713 {
2714  using Details::Behavior;
2716  using Teuchos::Array;
2717  using Teuchos::ArrayView;
2718  using Teuchos::RCP;
2719  using Teuchos::rcp;
2720  using Teuchos::toString;
2721  using std::cerr;
2722  using std::endl;
2723  using LO = LocalOrdinal;
2724  using GO = GlobalOrdinal;
2725  using map_type = Tpetra::Map<LO, GO, Node>;
2726 
2727  const bool verbose = Behavior::verbose("Map");
2728  std::unique_ptr<std::string> prefix;
2729  if (verbose) {
2730  auto comm = M.is_null() ? Teuchos::null : M->getComm();
2731  prefix = Details::createPrefix(
2732  comm.getRawPtr(), "createOneToOne(Map,TieBreak)");
2733  std::ostringstream os;
2734  os << *prefix << "Start" << endl;
2735  cerr << os.str();
2736  }
2737  const size_t maxNumToPrint = verbose ?
2738  Behavior::verbosePrintCountThreshold() : size_t(0);
2739 
2740  // FIXME (mfh 20 Feb 2013) We should have a bypass for contiguous
2741  // Maps (which are 1-to-1 by construction).
2742 
2744  if (verbose) {
2745  std::ostringstream os;
2746  os << *prefix << "Initialize Directory" << endl;
2747  cerr << os.str ();
2748  }
2749  directory.initialize (*M, tie_break);
2750  if (verbose) {
2751  std::ostringstream os;
2752  os << *prefix << "Done initializing Directory" << endl;
2753  cerr << os.str ();
2754  }
2755  size_t numMyElems = M->getLocalNumElements ();
2756  ArrayView<const GO> myElems = M->getLocalElementList ();
2757  Array<int> owner_procs_vec (numMyElems);
2758  if (verbose) {
2759  std::ostringstream os;
2760  os << *prefix << "Call Directory::getDirectoryEntries: ";
2761  verbosePrintArray(os, myElems, "GIDs", maxNumToPrint);
2762  os << endl;
2763  cerr << os.str();
2764  }
2765  directory.getDirectoryEntries (*M, myElems, owner_procs_vec ());
2766  if (verbose) {
2767  std::ostringstream os;
2768  os << *prefix << "getDirectoryEntries result: ";
2769  verbosePrintArray(os, owner_procs_vec, "PIDs", maxNumToPrint);
2770  os << endl;
2771  cerr << os.str();
2772  }
2773 
2774  const int myRank = M->getComm()->getRank();
2775  Array<GO> myOwned_vec (numMyElems);
2776  size_t numMyOwnedElems = 0;
2777  for (size_t i = 0; i < numMyElems; ++i) {
2778  const GO GID = myElems[i];
2779  const int owner = owner_procs_vec[i];
2780  if (myRank == owner) {
2781  myOwned_vec[numMyOwnedElems++] = GID;
2782  }
2783  }
2784  myOwned_vec.resize (numMyOwnedElems);
2785 
2786  // FIXME (mfh 08 May 2014) The above Directory should be perfectly
2787  // valid for the new Map. Why can't we reuse it?
2788  const global_size_t GINV =
2789  Tpetra::Details::OrdinalTraits<global_size_t>::invalid ();
2790  if (verbose) {
2791  std::ostringstream os;
2792  os << *prefix << "Create Map: ";
2793  verbosePrintArray(os, myOwned_vec, "GIDs", maxNumToPrint);
2794  os << endl;
2795  cerr << os.str();
2796  }
2797  RCP<const map_type> retMap
2798  (new map_type (GINV, myOwned_vec (), M->getIndexBase (),
2799  M->getComm ()));
2800  if (verbose) {
2801  std::ostringstream os;
2802  os << *prefix << "Done" << endl;
2803  cerr << os.str();
2804  }
2805  return retMap;
2806 }
2807 
2808 //
2809 // Explicit instantiation macro
2810 //
2811 // Must be expanded from within the Tpetra namespace!
2812 //
2813 
2815 
2816 #define TPETRA_MAP_INSTANT(LO,GO,NODE) \
2817  \
2818  template class Map< LO , GO , NODE >; \
2819  \
2820  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2821  createLocalMapWithNode<LO,GO,NODE> (const size_t numElements, \
2822  const Teuchos::RCP< const Teuchos::Comm< int > >& comm); \
2823  \
2824  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2825  createContigMapWithNode<LO,GO,NODE> (const global_size_t numElements, \
2826  const size_t localNumElements, \
2827  const Teuchos::RCP< const Teuchos::Comm< int > >& comm); \
2828  \
2829  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2830  createNonContigMapWithNode(const Teuchos::ArrayView<const GO> &elementList, \
2831  const Teuchos::RCP<const Teuchos::Comm<int> > &comm); \
2832  \
2833  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2834  createUniformContigMapWithNode<LO,GO,NODE> (const global_size_t numElements, \
2835  const Teuchos::RCP< const Teuchos::Comm< int > >& comm); \
2836  \
2837  template Teuchos::RCP<const Map<LO,GO,NODE> > \
2838  createOneToOne (const Teuchos::RCP<const Map<LO,GO,NODE> >& M); \
2839  \
2840  template Teuchos::RCP<const Map<LO,GO,NODE> > \
2841  createOneToOne (const Teuchos::RCP<const Map<LO,GO,NODE> >& M, \
2842  const Tpetra::Details::TieBreak<LO,GO>& tie_break); \
2843 
2844 
2846 #define TPETRA_MAP_INSTANT_DEFAULTNODE(LO,GO) \
2847  template Teuchos::RCP< const Map<LO,GO> > \
2848  createLocalMap<LO,GO>( const size_t, const Teuchos::RCP< const Teuchos::Comm< int > > &); \
2849  \
2850  template Teuchos::RCP< const Map<LO,GO> > \
2851  createContigMap<LO,GO>( global_size_t, size_t, \
2852  const Teuchos::RCP< const Teuchos::Comm< int > > &); \
2853  \
2854  template Teuchos::RCP< const Map<LO,GO> > \
2855  createNonContigMap(const Teuchos::ArrayView<const GO> &, \
2856  const Teuchos::RCP<const Teuchos::Comm<int> > &); \
2857  \
2858  template Teuchos::RCP< const Map<LO,GO> > \
2859  createUniformContigMap<LO,GO>( const global_size_t, \
2860  const Teuchos::RCP< const Teuchos::Comm< int > > &); \
2861 
2862 #endif // TPETRA_MAP_DEF_HPP
Interface for breaking ties in ownership.
bool congruent(const Teuchos::Comm< int > &comm1, const Teuchos::Comm< int > &comm2)
Whether the two communicators are congruent.
Definition: Tpetra_Util.cpp:64
global_ordinal_type getMaxGlobalIndex() const
The maximum global index owned by the calling process.
LocalOrdinal local_ordinal_type
The type of local indices.
Declaration of Tpetra::Details::extractMpiCommFromTeuchos.
virtual ~Map()
Destructor (virtual for memory safety of derived classes).
GlobalOrdinal global_ordinal_type
The type of global indices.
bool isSameAs(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is identical to this Map.
Declaration of Tpetra::Details::printOnce.
global_indices_array_type getMyGlobalIndices() const
Return a view of the global indices owned by this process.
bool isContiguous() const
True if this Map is distributed contiguously, else false.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
Declaration of a function that prints strings from each process.
size_t getLocalNumElements() const
The number of elements belonging to the calling process.
bool isLocallyFitted(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is locally fitted to this Map.
bool isUniform() const
Whether the range of global indices is uniform.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createContigMapWithNode(const global_size_t numElements, const size_t localNumElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a (potentially) nonuniformly distributed, contiguous Map for a user-specifi...
global_indices_array_device_type getMyGlobalIndicesDevice() const
Return a view of the global indices owned by this process on the Map&#39;s device.
bool isDistributed() const
Whether this Map is globally distributed or locally replicated.
Teuchos::RCP< const Map< local_ordinal_type, global_ordinal_type, Node > > removeEmptyProcesses() const
Advanced methods.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
static bool debug()
Whether Tpetra is in debug mode.
static void reject_unrecognized_env_vars()
Search the environment for TPETRA_ variables and reject unrecognized ones.
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
int local_ordinal_type
Default value of Scalar template parameter.
&quot;Local&quot; part of Map suitable for Kokkos kernels.
Declaration of Tpetra::Details::initializeKokkos.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator, in rank order.
Teuchos::ArrayView< const global_ordinal_type > getLocalElementList() const
Return a NONOWNING view of the global indices owned by this process.
void initializeKokkos()
Initialize Kokkos, using command-line arguments (if any) given to Teuchos::GlobalMPISession.
size_t global_size_t
Global size_t object.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
bool isCompatible(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is compatible with this Map.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified, possibly nondefault Kokkos Node type.
global_ordinal_type getIndexBase() const
The index base for this Map.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
Teuchos::RCP< const Map< local_ordinal_type, global_ordinal_type, Node > > replaceCommWithSubset(const Teuchos::RCP< const Teuchos::Comm< int > > &newComm) const
Replace this Map&#39;s communicator with a subset communicator.
Functions for initializing and finalizing Tpetra.
bool isNodeGlobalElement(global_ordinal_type globalIndex) const
Whether the given global index is owned by this Map on the calling process.
static bool verbose()
Whether Tpetra is in verbose mode.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createUniformContigMap(const global_size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a uniformly distributed, contiguous Map with the default Kokkos Node...
LookupStatus getDirectoryEntries(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs) const
Given a global ID list, return the list of their owning process IDs.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createContigMap(const global_size_t numElements, const size_t localNumElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a (potentially) non-uniformly distributed, contiguous Map using the defaul...
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createLocalMap(const size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a locally replicated Map with the default Kokkos Node.
local_map_type getLocalMap() const
Get the local Map for Kokkos kernels.
global_ordinal_type getMinGlobalIndex() const
The minimum global index owned by the calling process.
typename device_type::execution_space execution_space
The Kokkos execution space.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createUniformContigMapWithNode(const global_size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a uniformly distributed, contiguous Map with a user-specified Kokkos Node...
A parallel distribution of indices over processes.
global_ordinal_type getMaxAllGlobalIndex() const
The maximum global index over all processes in the communicator.
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
Implement mapping from global ID to process ID and local ID.
Stand-alone utility functions and macros.
bool isOneToOne() const
Whether the Map is one to one.
void initialize(const map_type &map)
Initialize the Directory with its Map.
bool locallySameAs(const Map< local_ordinal_type, global_ordinal_type, node_type > &map) const
Is this Map locally the same as the input Map?
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createLocalMapWithNode(const size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a locally replicated Map with a specified Kokkos Node.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
typename Node::device_type device_type
This class&#39; Kokkos::Device specialization.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createNonContigMap(const Teuchos::ArrayView< const GlobalOrdinal > &elementList, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a non-contiguous Map using the default Kokkos::Device type.
LocalGlobal
Enum for local versus global allocation of Map entries.
LookupStatus getRemoteIndexList(const Teuchos::ArrayView< const global_ordinal_type > &GIDList, const Teuchos::ArrayView< int > &nodeIDList, const Teuchos::ArrayView< local_ordinal_type > &LIDList) const
Return the process ranks and corresponding local indices for the given global indices.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Describe this object in a human-readable way to the given output stream.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createNonContigMapWithNode(const Teuchos::ArrayView< const GlobalOrdinal > &elementList, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a noncontiguous Map with a user-specified, possibly nondefault Kokkos Node ...
Node node_type
Legacy typedef that will go away at some point.
global_ordinal_type getMinAllGlobalIndex() const
The minimum global index over all processes in the communicator.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
global_size_t getGlobalNumElements() const
The number of elements in this Map.
Description of Tpetra&#39;s behavior.
std::string description() const
Implementation of Teuchos::Describable.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.
Map()
Default constructor (that does nothing).