Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Map_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ************************************************************************
38 // @HEADER
39 
44 
45 #ifndef TPETRA_MAP_DEF_HPP
46 #define TPETRA_MAP_DEF_HPP
47 
48 #include <memory>
49 #include <sstream>
50 #include <stdexcept>
51 #include <typeinfo>
52 
53 #include "Teuchos_as.hpp"
54 #include "Teuchos_TypeNameTraits.hpp"
55 #include "Teuchos_CommHelpers.hpp"
56 
57 #include "Tpetra_Directory.hpp" // must include for implicit instantiation to work
59 #include "Tpetra_Details_FixedHashTable.hpp"
62 #include "Tpetra_Core.hpp"
63 #include "Tpetra_Util.hpp"
64 #include "Tpetra_Details_mpiIsInitialized.hpp"
65 #include "Tpetra_Details_extractMpiCommFromTeuchos.hpp" // teuchosCommIsAnMpiComm
68 
69 namespace { // (anonymous)
70 
71  void
72  checkMapInputArray (const char ctorName[],
73  const void* indexList,
74  const size_t indexListSize,
75  const Teuchos::Comm<int>* const comm)
76  {
78 
79  const bool debug = Behavior::debug("Map");
80  if (debug) {
81  using Teuchos::outArg;
82  using Teuchos::REDUCE_MIN;
83  using Teuchos::reduceAll;
84  using std::endl;
85 
86  const int myRank = comm == nullptr ? 0 : comm->getRank ();
87  const bool verbose = Behavior::verbose("Map");
88  std::ostringstream lclErrStrm;
89  int lclSuccess = 1;
90 
91  if (indexListSize != 0 && indexList == nullptr) {
92  lclSuccess = 0;
93  if (verbose) {
94  lclErrStrm << "Proc " << myRank << ": indexList is null, "
95  "but indexListSize=" << indexListSize << " != 0." << endl;
96  }
97  }
98  int gblSuccess = 0; // output argument
99  reduceAll (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
100  if (gblSuccess != 1) {
101  std::ostringstream gblErrStrm;
102  gblErrStrm << "Tpetra::Map constructor " << ctorName <<
103  " detected a problem with the input array "
104  "(raw array, Teuchos::ArrayView, or Kokkos::View) "
105  "of global indices." << endl;
106  if (verbose) {
107  using ::Tpetra::Details::gathervPrint;
108  gathervPrint (gblErrStrm, lclErrStrm.str (), *comm);
109  }
110  TEUCHOS_TEST_FOR_EXCEPTION
111  (true, std::invalid_argument, gblErrStrm.str ());
112  }
113  }
114  }
115 
116 
117 
118 
119  template <class LocalOrdinal, class GlobalOrdinal, class ViewType>
120  void computeConstantsOnDevice(const ViewType& entryList, GlobalOrdinal & minMyGID, GlobalOrdinal & maxMyGID, GlobalOrdinal &firstContiguousGID, GlobalOrdinal &lastContiguousGID_val, LocalOrdinal &lastContiguousGID_loc) {
121  using LO = LocalOrdinal;
122  using GO = GlobalOrdinal;
123  using exec_space = typename ViewType::device_type::execution_space;
124  using range_policy = Kokkos::RangePolicy<exec_space, Kokkos::IndexType<LO> >;
125  const LO numLocalElements = entryList.extent(0);
126 
127  // We're going to use the minloc backwards because we need to have it sort on the "location" and have the "value" along for the
128  // ride, rather than the other way around
129  typedef typename Kokkos::MinLoc<LO,GO>::value_type minloc_type;
130  minloc_type myMinLoc;
131 
132  // Find the initial sequence of parallel gids
133  // To find the lastContiguousGID_, we find the first guy where entryList[i] - entryList[0] != i-0. That's the first non-contiguous guy.
134  // We want the one *before* that guy.
135  Kokkos::parallel_reduce(range_policy(0,numLocalElements),KOKKOS_LAMBDA(const LO & i, GO &l_myMin, GO&l_myMax, GO& l_firstCont, minloc_type & l_lastCont){
136  GO entry_0 = entryList[0];
137  GO entry_i = entryList[i];
138 
139  // Easy stuff
140  l_myMin = (l_myMin < entry_i) ? l_myMin : entry_i;
141  l_myMax = (l_myMax > entry_i) ? l_myMax : entry_i;
142  l_firstCont = entry_0;
143 
144  if(entry_i - entry_0 != i && l_lastCont.val >= i) {
145  // We're non-contiguous, so the guy before us could be the last contiguous guy
146  l_lastCont.val = i-1;
147  l_lastCont.loc = entryList[i-1];
148  }
149  else if (i == numLocalElements-1 && i < l_lastCont.val) {
150  // If we're last, we always think we're the last contiguous guy, unless someone non-contiguous is already here
151  l_lastCont.val = i;
152  l_lastCont.loc = entry_i;
153  }
154 
155  },Kokkos::Min<GO>(minMyGID),Kokkos::Max<GO>(maxMyGID),Kokkos::Min<GO>(firstContiguousGID),Kokkos::MinLoc<LO,GO>(myMinLoc));
156 
157  // This switch is intentional, since we're using MinLoc backwards
158  lastContiguousGID_val = myMinLoc.loc;
159  lastContiguousGID_loc = myMinLoc.val;
160  }
161 
162 
163 } // namespace (anonymous)
164 
165 namespace Tpetra {
166 
167  template <class LocalOrdinal, class GlobalOrdinal, class Node>
169  Map () :
170  comm_ (new Teuchos::SerialComm<int> ()),
171  indexBase_ (0),
172  numGlobalElements_ (0),
173  numLocalElements_ (0),
174  minMyGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
175  maxMyGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
176  minAllGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
177  maxAllGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
178  firstContiguousGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
179  lastContiguousGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
180  uniform_ (false), // trivially
181  contiguous_ (false),
182  distributed_ (false), // no communicator yet
183  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
184  {
186  }
187 
188 
189  template <class LocalOrdinal, class GlobalOrdinal, class Node>
191  Map (const global_size_t numGlobalElements,
192  const global_ordinal_type indexBase,
193  const Teuchos::RCP<const Teuchos::Comm<int> > &comm,
194  const LocalGlobal lOrG) :
195  comm_ (comm),
196  uniform_ (true),
197  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
198  {
199  using Teuchos::as;
200  using Teuchos::broadcast;
201  using Teuchos::outArg;
202  using Teuchos::reduceAll;
203  using Teuchos::REDUCE_MIN;
204  using Teuchos::REDUCE_MAX;
205  using Teuchos::typeName;
206  using std::endl;
207  using GO = global_ordinal_type;
208  using GST = global_size_t;
209  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
210  const char funcName[] = "Map(gblNumInds,indexBase,comm,LG)";
211  const char exPfx[] =
212  "Tpetra::Map::Map(gblNumInds,indexBase,comm,LG): ";
213 
214  const bool debug = Details::Behavior::debug("Map");
215  const bool verbose = Details::Behavior::verbose("Map");
216  std::unique_ptr<std::string> prefix;
217  if (verbose) {
218  prefix = Details::createPrefix(
219  comm_.getRawPtr(), "Map", funcName);
220  std::ostringstream os;
221  os << *prefix << "Start" << endl;
222  std::cerr << os.str();
223  }
225 
226  // In debug mode only, check whether numGlobalElements and
227  // indexBase are the same over all processes in the communicator.
228  if (debug) {
229  GST proc0NumGlobalElements = numGlobalElements;
230  broadcast(*comm_, 0, outArg(proc0NumGlobalElements));
231  GST minNumGlobalElements = numGlobalElements;
232  GST maxNumGlobalElements = numGlobalElements;
233  reduceAll(*comm, REDUCE_MIN, numGlobalElements,
234  outArg(minNumGlobalElements));
235  reduceAll(*comm, REDUCE_MAX, numGlobalElements,
236  outArg(maxNumGlobalElements));
237  TEUCHOS_TEST_FOR_EXCEPTION
238  (minNumGlobalElements != maxNumGlobalElements ||
239  numGlobalElements != minNumGlobalElements,
240  std::invalid_argument, exPfx << "All processes must "
241  "provide the same number of global elements. Process 0 set "
242  "numGlobalElements="<< proc0NumGlobalElements << ". The "
243  "calling process " << comm->getRank() << " set "
244  "numGlobalElements=" << numGlobalElements << ". The min "
245  "and max values over all processes are "
246  << minNumGlobalElements << " resp. " << maxNumGlobalElements
247  << ".");
248 
249  GO proc0IndexBase = indexBase;
250  broadcast<int, GO> (*comm_, 0, outArg (proc0IndexBase));
251  GO minIndexBase = indexBase;
252  GO maxIndexBase = indexBase;
253  reduceAll(*comm, REDUCE_MIN, indexBase, outArg(minIndexBase));
254  reduceAll(*comm, REDUCE_MAX, indexBase, outArg(maxIndexBase));
255  TEUCHOS_TEST_FOR_EXCEPTION
256  (minIndexBase != maxIndexBase || indexBase != minIndexBase,
257  std::invalid_argument, exPfx << "All processes must "
258  "provide the same indexBase argument. Process 0 set "
259  "indexBase=" << proc0IndexBase << ". The calling process "
260  << comm->getRank() << " set indexBase=" << indexBase
261  << ". The min and max values over all processes are "
262  << minIndexBase << " resp. " << maxIndexBase << ".");
263  }
264 
265  // Distribute the elements across the processes in the given
266  // communicator so that global IDs (GIDs) are
267  //
268  // - Nonoverlapping (only one process owns each GID)
269  // - Contiguous (the sequence of GIDs is nondecreasing, and no two
270  // adjacent GIDs differ by more than one)
271  // - As evenly distributed as possible (the numbers of GIDs on two
272  // different processes do not differ by more than one)
273 
274  // All processes have the same numGlobalElements, but we still
275  // need to check that it is valid. numGlobalElements must be
276  // positive and not the "invalid" value (GSTI).
277  //
278  // This comparison looks funny, but it avoids compiler warnings
279  // for comparing unsigned integers (numGlobalElements_in is a
280  // GST, which is unsigned) while still working if we
281  // later decide to make GST signed.
282  TEUCHOS_TEST_FOR_EXCEPTION(
283  (numGlobalElements < 1 && numGlobalElements != 0),
284  std::invalid_argument, exPfx << "numGlobalElements (= "
285  << numGlobalElements << ") must be nonnegative.");
286 
287  TEUCHOS_TEST_FOR_EXCEPTION
288  (numGlobalElements == GSTI, std::invalid_argument, exPfx <<
289  "You provided numGlobalElements = Teuchos::OrdinalTraits<"
290  "Tpetra::global_size_t>::invalid(). This version of the "
291  "constructor requires a valid value of numGlobalElements. "
292  "You probably mistook this constructor for the \"contiguous "
293  "nonuniform\" constructor, which can compute the global "
294  "number of elements for you if you set numGlobalElements to "
295  "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid().");
296 
297  size_t numLocalElements = 0; // will set below
298  if (lOrG == GloballyDistributed) {
299  // Compute numLocalElements:
300  //
301  // If numGlobalElements == numProcs * B + remainder,
302  // then Proc r gets B+1 elements if r < remainder,
303  // and B elements if r >= remainder.
304  //
305  // This strategy is valid for any value of numGlobalElements and
306  // numProcs, including the following border cases:
307  // - numProcs == 1
308  // - numLocalElements < numProcs
309  //
310  // In the former case, remainder == 0 && numGlobalElements ==
311  // numLocalElements. In the latter case, remainder ==
312  // numGlobalElements && numLocalElements is either 0 or 1.
313  const GST numProcs = static_cast<GST> (comm_->getSize ());
314  const GST myRank = static_cast<GST> (comm_->getRank ());
315  const GST quotient = numGlobalElements / numProcs;
316  const GST remainder = numGlobalElements - quotient * numProcs;
317 
318  GO startIndex;
319  if (myRank < remainder) {
320  numLocalElements = static_cast<size_t> (1) + static_cast<size_t> (quotient);
321  // myRank was originally an int, so it should never overflow
322  // reasonable GO types.
323  startIndex = as<GO> (myRank) * as<GO> (numLocalElements);
324  } else {
325  numLocalElements = as<size_t> (quotient);
326  startIndex = as<GO> (myRank) * as<GO> (numLocalElements) +
327  as<GO> (remainder);
328  }
329 
330  minMyGID_ = indexBase + startIndex;
331  maxMyGID_ = indexBase + startIndex + numLocalElements - 1;
332  minAllGID_ = indexBase;
333  maxAllGID_ = indexBase + numGlobalElements - 1;
334  distributed_ = (numProcs > 1);
335  }
336  else { // lOrG == LocallyReplicated
337  numLocalElements = as<size_t> (numGlobalElements);
338  minMyGID_ = indexBase;
339  maxMyGID_ = indexBase + numGlobalElements - 1;
340  distributed_ = false;
341  }
342 
343  minAllGID_ = indexBase;
344  maxAllGID_ = indexBase + numGlobalElements - 1;
345  indexBase_ = indexBase;
346  numGlobalElements_ = numGlobalElements;
347  numLocalElements_ = numLocalElements;
348  firstContiguousGID_ = minMyGID_;
349  lastContiguousGID_ = maxMyGID_;
350  contiguous_ = true;
351 
352  // Create the Directory on demand in getRemoteIndexList().
353  //setupDirectory ();
354 
355  if (verbose) {
356  std::ostringstream os;
357  os << *prefix << "Done" << endl;
358  std::cerr << os.str();
359  }
360  }
361 
362 
363  template <class LocalOrdinal, class GlobalOrdinal, class Node>
365  Map (const global_size_t numGlobalElements,
366  const size_t numLocalElements,
367  const global_ordinal_type indexBase,
368  const Teuchos::RCP<const Teuchos::Comm<int> > &comm) :
369  comm_ (comm),
370  uniform_ (false),
371  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
372  {
373  using Teuchos::as;
374  using Teuchos::broadcast;
375  using Teuchos::outArg;
376  using Teuchos::reduceAll;
377  using Teuchos::REDUCE_MIN;
378  using Teuchos::REDUCE_MAX;
379  using Teuchos::REDUCE_SUM;
380  using Teuchos::scan;
381  using std::endl;
382  using GO = global_ordinal_type;
383  using GST = global_size_t;
384  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
385  const char funcName[] =
386  "Map(gblNumInds,lclNumInds,indexBase,comm)";
387  const char exPfx[] =
388  "Tpetra::Map::Map(gblNumInds,lclNumInds,indexBase,comm): ";
389  const char suffix[] =
390  ". Please report this bug to the Tpetra developers.";
391 
392  const bool debug = Details::Behavior::debug("Map");
393  const bool verbose = Details::Behavior::verbose("Map");
394  std::unique_ptr<std::string> prefix;
395  if (verbose) {
396  prefix = Details::createPrefix(
397  comm_.getRawPtr(), "Map", funcName);
398  std::ostringstream os;
399  os << *prefix << "Start" << endl;
400  std::cerr << os.str();
401  }
403 
404  // Global sum of numLocalElements over all processes.
405  // Keep this for later debug checks.
406  GST debugGlobalSum {};
407  if (debug) {
408  debugGlobalSum = initialNonuniformDebugCheck(exPfx,
409  numGlobalElements, numLocalElements, indexBase, comm);
410  }
411 
412  // Distribute the elements across the nodes so that they are
413  // - non-overlapping
414  // - contiguous
415 
416  // This differs from the first Map constructor (that only takes a
417  // global number of elements) in that the user has specified the
418  // number of local elements, so that the elements are not
419  // (necessarily) evenly distributed over the processes.
420 
421  // Compute my local offset. This is an inclusive scan, so to get
422  // the final offset, we subtract off the input.
423  GO scanResult = 0;
424  scan<int, GO> (*comm, REDUCE_SUM, numLocalElements, outArg (scanResult));
425  const GO myOffset = scanResult - numLocalElements;
426 
427  if (numGlobalElements != GSTI) {
428  numGlobalElements_ = numGlobalElements; // Use the user's value.
429  }
430  else {
431  // Inclusive scan means that the last process has the final sum.
432  // Rather than doing a reduceAll to get the sum of
433  // numLocalElements, we can just have the last process broadcast
434  // its result. That saves us a round of log(numProcs) messages.
435  const int numProcs = comm->getSize ();
436  GST globalSum = scanResult;
437  if (numProcs > 1) {
438  broadcast (*comm, numProcs - 1, outArg (globalSum));
439  }
440  numGlobalElements_ = globalSum;
441 
442  if (debug) {
443  // No need for an all-reduce here; both come from collectives.
444  TEUCHOS_TEST_FOR_EXCEPTION
445  (globalSum != debugGlobalSum, std::logic_error, exPfx <<
446  "globalSum = " << globalSum << " != debugGlobalSum = " <<
447  debugGlobalSum << suffix);
448  }
449  }
450  numLocalElements_ = numLocalElements;
451  indexBase_ = indexBase;
452  minAllGID_ = (numGlobalElements_ == 0) ?
453  std::numeric_limits<GO>::max () :
454  indexBase;
455  maxAllGID_ = (numGlobalElements_ == 0) ?
456  std::numeric_limits<GO>::lowest () :
457  indexBase + GO(numGlobalElements_) - GO(1);
458  minMyGID_ = (numLocalElements_ == 0) ?
459  std::numeric_limits<GO>::max () :
460  indexBase + GO(myOffset);
461  maxMyGID_ = (numLocalElements_ == 0) ?
462  std::numeric_limits<GO>::lowest () :
463  indexBase + myOffset + GO(numLocalElements) - GO(1);
464  firstContiguousGID_ = minMyGID_;
465  lastContiguousGID_ = maxMyGID_;
466  contiguous_ = true;
467  distributed_ = checkIsDist ();
468 
469  // Create the Directory on demand in getRemoteIndexList().
470  //setupDirectory ();
471 
472  if (verbose) {
473  std::ostringstream os;
474  os << *prefix << "Done" << endl;
475  std::cerr << os.str();
476  }
477  }
478 
479  template <class LocalOrdinal, class GlobalOrdinal, class Node>
483  const char errorMessagePrefix[],
484  const global_size_t numGlobalElements,
485  const size_t numLocalElements,
486  const global_ordinal_type indexBase,
487  const Teuchos::RCP<const Teuchos::Comm<int>>& comm) const
488  {
489  const bool debug = Details::Behavior::debug("Map");
490  if (! debug) {
491  return global_size_t(0);
492  }
493 
494  using Teuchos::broadcast;
495  using Teuchos::outArg;
496  using Teuchos::ptr;
497  using Teuchos::REDUCE_MAX;
498  using Teuchos::REDUCE_MIN;
499  using Teuchos::REDUCE_SUM;
500  using Teuchos::reduceAll;
501  using GO = global_ordinal_type;
502  using GST = global_size_t;
503  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
504 
505  // The user has specified the distribution of indices over the
506  // processes. The distribution is not necessarily contiguous or
507  // equally shared over the processes.
508  //
509  // We assume that the number of local elements can be stored in a
510  // size_t. The instance member numLocalElements_ is a size_t, so
511  // this variable and that should have the same type.
512 
513  GST debugGlobalSum = 0; // Will be global sum of numLocalElements
514  reduceAll<int, GST> (*comm, REDUCE_SUM, static_cast<GST> (numLocalElements),
515  outArg (debugGlobalSum));
516  // In debug mode only, check whether numGlobalElements and
517  // indexBase are the same over all processes in the communicator.
518  {
519  GST proc0NumGlobalElements = numGlobalElements;
520  broadcast<int, GST> (*comm_, 0, outArg (proc0NumGlobalElements));
521  GST minNumGlobalElements = numGlobalElements;
522  GST maxNumGlobalElements = numGlobalElements;
523  reduceAll<int, GST> (*comm, REDUCE_MIN, numGlobalElements,
524  outArg (minNumGlobalElements));
525  reduceAll<int, GST> (*comm, REDUCE_MAX, numGlobalElements,
526  outArg (maxNumGlobalElements));
527  TEUCHOS_TEST_FOR_EXCEPTION
528  (minNumGlobalElements != maxNumGlobalElements ||
529  numGlobalElements != minNumGlobalElements,
530  std::invalid_argument, errorMessagePrefix << "All processes "
531  "must provide the same number of global elements, even if "
532  "that argument is "
533  "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid() "
534  "(which signals that the Map should compute the global "
535  "number of elements). Process 0 set numGlobalElements"
536  "=" << proc0NumGlobalElements << ". The calling process "
537  << comm->getRank() << " set numGlobalElements=" <<
538  numGlobalElements << ". The min and max values over all "
539  "processes are " << minNumGlobalElements << " resp. " <<
540  maxNumGlobalElements << ".");
541 
542  GO proc0IndexBase = indexBase;
543  broadcast<int, GO> (*comm_, 0, outArg (proc0IndexBase));
544  GO minIndexBase = indexBase;
545  GO maxIndexBase = indexBase;
546  reduceAll<int, GO> (*comm, REDUCE_MIN, indexBase, outArg (minIndexBase));
547  reduceAll<int, GO> (*comm, REDUCE_MAX, indexBase, outArg (maxIndexBase));
548  TEUCHOS_TEST_FOR_EXCEPTION
549  (minIndexBase != maxIndexBase || indexBase != minIndexBase,
550  std::invalid_argument, errorMessagePrefix <<
551  "All processes must provide the same indexBase argument. "
552  "Process 0 set indexBase = " << proc0IndexBase << ". The "
553  "calling process " << comm->getRank() << " set indexBase="
554  << indexBase << ". The min and max values over all "
555  "processes are " << minIndexBase << " resp. " << maxIndexBase
556  << ".");
557 
558  // Make sure that the sum of numLocalElements over all processes
559  // equals numGlobalElements.
560  TEUCHOS_TEST_FOR_EXCEPTION
561  (numGlobalElements != GSTI &&
562  debugGlobalSum != numGlobalElements, std::invalid_argument,
563  errorMessagePrefix << "The sum of each process' number of "
564  "indices over all processes, " << debugGlobalSum << ", != "
565  << "numGlobalElements=" << numGlobalElements << ". If you "
566  "would like this constructor to compute numGlobalElements "
567  "for you, you may set numGlobalElements="
568  "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid() "
569  "on input. Please note that this is NOT necessarily -1.");
570  }
571  return debugGlobalSum;
572  }
573 
574  template <class LocalOrdinal, class GlobalOrdinal, class Node>
575  void
576  Map<LocalOrdinal,GlobalOrdinal,Node>::
577  initWithNonownedHostIndexList(
578  const char errorMessagePrefix[],
579  const global_size_t numGlobalElements,
580  const Kokkos::View<const global_ordinal_type*,
581  Kokkos::LayoutLeft,
582  Kokkos::HostSpace,
583  Kokkos::MemoryUnmanaged>& entryList_host,
584  const global_ordinal_type indexBase,
585  const Teuchos::RCP<const Teuchos::Comm<int>>& comm)
586  {
587  Tpetra::Details::ProfilingRegion pr("Map::initWithNonownedHostIndexList()");
588 
589  using Kokkos::LayoutLeft;
590  using Kokkos::subview;
591  using Kokkos::View;
592  using Kokkos::view_alloc;
593  using Kokkos::WithoutInitializing;
594  using Teuchos::as;
595  using Teuchos::broadcast;
596  using Teuchos::outArg;
597  using Teuchos::ptr;
598  using Teuchos::REDUCE_MAX;
599  using Teuchos::REDUCE_MIN;
600  using Teuchos::REDUCE_SUM;
601  using Teuchos::reduceAll;
602  using LO = local_ordinal_type;
603  using GO = global_ordinal_type;
604  using GST = global_size_t;
605  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
606  // Make sure that Kokkos has been initialized (Github Issue #513).
607  TEUCHOS_TEST_FOR_EXCEPTION
608  (! Kokkos::is_initialized (), std::runtime_error,
609  errorMessagePrefix << "The Kokkos execution space "
610  << Teuchos::TypeNameTraits<execution_space>::name()
611  << " has not been initialized. "
612  "Please initialize it before creating a Map.")
613 
614  // The user has specified the distribution of indices over the
615  // processes, via the input array of global indices on each
616  // process. The distribution is not necessarily contiguous or
617  // equally shared over the processes.
618 
619  // The length of the input array on this process is the number of
620  // local indices to associate with this process, even though the
621  // input array contains global indices. We assume that the number
622  // of local indices on a process can be stored in a size_t;
623  // numLocalElements_ is a size_t, so this variable and that should
624  // have the same type.
625  const size_t numLocalElements(entryList_host.size());
626 
627  initialNonuniformDebugCheck(errorMessagePrefix, numGlobalElements,
628  numLocalElements, indexBase, comm);
629 
630  // NOTE (mfh 20 Feb 2013, 10 Oct 2016) In some sense, this global
631  // reduction is redundant, since the directory Map will have to do
632  // the same thing. Thus, we could do the scan and broadcast for
633  // the directory Map here, and give the computed offsets to the
634  // directory Map's constructor. However, a reduction costs less
635  // than a scan and broadcast, so this still saves time if users of
636  // this Map don't ever need the Directory (i.e., if they never
637  // call getRemoteIndexList on this Map).
638  if (numGlobalElements != GSTI) {
639  numGlobalElements_ = numGlobalElements; // Use the user's value.
640  }
641  else { // The user wants us to compute the sum.
642  reduceAll(*comm, REDUCE_SUM,
643  static_cast<GST>(numLocalElements),
644  outArg(numGlobalElements_));
645  }
646 
647  // mfh 20 Feb 2013: We've never quite done the right thing for
648  // duplicate GIDs here. Duplicate GIDs have always been counted
649  // distinctly in numLocalElements_, and thus should get a
650  // different LID. However, we've always used std::map or a hash
651  // table for the GID -> LID lookup table, so distinct GIDs always
652  // map to the same LID. Furthermore, the order of the input GID
653  // list matters, so it's not desirable to sort for determining
654  // uniqueness.
655  //
656  // I've chosen for now to write this code as if the input GID list
657  // contains no duplicates. If this is not desired, we could use
658  // the lookup table itself to determine uniqueness: If we haven't
659  // seen the GID before, it gets a new LID and it's added to the
660  // LID -> GID and GID -> LID tables. If we have seen the GID
661  // before, it doesn't get added to either table. I would
662  // implement this, but it would cost more to do the double lookups
663  // in the table (one to check, and one to insert).
664  //
665  // More importantly, since we build the GID -> LID table in (a
666  // thread-) parallel (way), the order in which duplicate GIDs may
667  // get inserted is not defined. This would make the assignment of
668  // LID to GID nondeterministic.
669 
670  numLocalElements_ = numLocalElements;
671  indexBase_ = indexBase;
672 
673  minMyGID_ = indexBase_;
674  maxMyGID_ = indexBase_;
675 
676  // NOTE (mfh 27 May 2015): While finding the initial contiguous
677  // GID range requires looking at all the GIDs in the range,
678  // dismissing an interval of GIDs only requires looking at the
679  // first and last GIDs. Thus, we could do binary search backwards
680  // from the end in order to catch the common case of a contiguous
681  // interval followed by noncontiguous entries. On the other hand,
682  // we could just expose this case explicitly as yet another Map
683  // constructor, and avoid the trouble of detecting it.
684  if (numLocalElements_ > 0) {
685  // Find contiguous GID range, with the restriction that the
686  // beginning of the range starts with the first entry. While
687  // doing so, fill in the LID -> GID table.
688  typename decltype (lgMap_)::non_const_type lgMap
689  (view_alloc ("lgMap", WithoutInitializing), numLocalElements_);
690  auto lgMap_host =
691  Kokkos::create_mirror_view (Kokkos::HostSpace (), lgMap);
692 
693  // The input array entryList_host is already on host, so we
694  // don't need to take a host view of it.
695  // auto entryList_host =
696  // Kokkos::create_mirror_view (Kokkos::HostSpace (), entryList);
697  // Kokkos::deep_copy (entryList_host, entryList);
698 
699  firstContiguousGID_ = entryList_host[0];
700  lastContiguousGID_ = firstContiguousGID_+1;
701 
702  // FIXME (mfh 23 Sep 2015) We need to copy the input GIDs
703  // anyway, so we have to look at them all. The logical way to
704  // find the first noncontiguous entry would thus be to "reduce,"
705  // where the local reduction result is whether entryList[i] + 1
706  // == entryList[i+1].
707 
708  lgMap_host[0] = firstContiguousGID_;
709  size_t i = 1;
710  for ( ; i < numLocalElements_; ++i) {
711  const GO curGid = entryList_host[i];
712  const LO curLid = as<LO> (i);
713 
714  if (lastContiguousGID_ != curGid) break;
715 
716  // Add the entry to the LID->GID table only after we know that
717  // the current GID is in the initial contiguous sequence, so
718  // that we don't repeat adding it in the first iteration of
719  // the loop below over the remaining noncontiguous GIDs.
720  lgMap_host[curLid] = curGid;
721  ++lastContiguousGID_;
722  }
723  --lastContiguousGID_;
724  // NOTE: i is the first non-contiguous index.
725 
726  // [firstContiguousGID_, lastContigousGID_] is the initial
727  // sequence of contiguous GIDs. We can start the min and max
728  // GID using this range.
729  minMyGID_ = firstContiguousGID_;
730  maxMyGID_ = lastContiguousGID_;
731 
732  // Compute the GID -> LID lookup table, _not_ including the
733  // initial sequence of contiguous GIDs.
734  LO firstNonContiguous_loc=i;
735  {
736 
737  const std::pair<size_t, size_t> ncRange (i, entryList_host.extent (0));
738  auto nonContigGids_host = subview (entryList_host, ncRange);
739  TEUCHOS_TEST_FOR_EXCEPTION
740  (static_cast<size_t> (nonContigGids_host.extent (0)) !=
741  static_cast<size_t> (entryList_host.extent (0) - i),
742  std::logic_error, "Tpetra::Map noncontiguous constructor: "
743  "nonContigGids_host.extent(0) = "
744  << nonContigGids_host.extent (0)
745  << " != entryList_host.extent(0) - i = "
746  << (entryList_host.extent (0) - i) << " = "
747  << entryList_host.extent (0) << " - " << i
748  << ". Please report this bug to the Tpetra developers.");
749 
750  // FixedHashTable's constructor expects an owned device View,
751  // so we must deep-copy the subview of the input indices.
752  View<GO*, LayoutLeft, device_type>
753  nonContigGids (view_alloc ("nonContigGids", WithoutInitializing),
754  nonContigGids_host.size ());
755 
756 
757  // DEEP_COPY REVIEW - HOST-TO-DEVICE
758  Kokkos::deep_copy (execution_space(), nonContigGids, nonContigGids_host);
759  Kokkos::fence("Map::initWithNonownedHostIndexList"); // for UVM issues below - which will be refatored soon so FixedHashTable can build as pure CudaSpace - then I think remove this fence
760 
761  glMap_ = global_to_local_table_type(nonContigGids,
762  firstNonContiguous_loc);
763  // Make host version - when memory spaces match these just do trivial assignment
764  glMapHost_ = global_to_local_table_host_type(glMap_);
765  }
766 
767  // FIXME (mfh 10 Oct 2016) When we construct the global-to-local
768  // table above, we have to look at all the (noncontiguous) input
769  // indices anyway. Thus, why not have the constructor compute
770  // and return the min and max?
771 
772  for ( ; i < numLocalElements_; ++i) {
773  const GO curGid = entryList_host[i];
774  const LO curLid = static_cast<LO> (i);
775  lgMap_host[curLid] = curGid; // LID -> GID table
776 
777  // While iterating through entryList, we compute its
778  // (process-local) min and max elements.
779  if (curGid < minMyGID_) {
780  minMyGID_ = curGid;
781  }
782  if (curGid > maxMyGID_) {
783  maxMyGID_ = curGid;
784  }
785  }
786 
787  // We filled lgMap on host above; now sync back to device.
788  // DEEP_COPY REVIEW - HOST-TO-DEVICE
789  Kokkos::deep_copy (execution_space(), lgMap, lgMap_host);
790 
791  // "Commit" the local-to-global lookup table we filled in above.
792  lgMap_ = lgMap;
793  // We've already created this, so use it.
794  lgMapHost_ = lgMap_host;
795 
796 
797  }
798  else {
799  minMyGID_ = std::numeric_limits<GlobalOrdinal>::max();
800  maxMyGID_ = std::numeric_limits<GlobalOrdinal>::lowest();
801  // This insures tests for GIDs in the range
802  // [firstContiguousGID_, lastContiguousGID_] fail for processes
803  // with no local elements.
804  firstContiguousGID_ = indexBase_+1;
805  lastContiguousGID_ = indexBase_;
806  // glMap_ was default constructed, so it's already empty.
807 
808  }
809 
810 
811 
812 
813  // Compute the min and max of all processes' GIDs. If
814  // numLocalElements_ == 0 on this process, minMyGID_ and maxMyGID_
815  // are both indexBase_. This is wrong, but fixing it would
816  // require either a fancy sparse all-reduce, or a custom reduction
817  // operator that ignores invalid values ("invalid" means
818  // Tpetra::Details::OrdinalTraits<GO>::invalid()).
819  //
820  // Also, while we're at it, use the same all-reduce to figure out
821  // if the Map is distributed. "Distributed" means that there is
822  // at least one process with a number of local elements less than
823  // the number of global elements.
824  //
825  // We're computing the min and max of all processes' GIDs using a
826  // single MAX all-reduce, because min(x,y) = -max(-x,-y) (when x
827  // and y are signed). (This lets us combine the min and max into
828  // a single all-reduce.) If each process sets localDist=1 if its
829  // number of local elements is strictly less than the number of
830  // global elements, and localDist=0 otherwise, then a MAX
831  // all-reduce on localDist tells us if the Map is distributed (1
832  // if yes, 0 if no). Thus, we can append localDist onto the end
833  // of the data and get the global result from the all-reduce.
834  if (std::numeric_limits<GO>::is_signed) {
835  // Does my process NOT own all the elements?
836  const GO localDist =
837  (as<GST> (numLocalElements_) < numGlobalElements_) ? 1 : 0;
838 
839  GO minMaxInput[3];
840  minMaxInput[0] = -minMyGID_;
841  minMaxInput[1] = maxMyGID_;
842  minMaxInput[2] = localDist;
843 
844  GO minMaxOutput[3];
845  minMaxOutput[0] = 0;
846  minMaxOutput[1] = 0;
847  minMaxOutput[2] = 0;
848  reduceAll<int, GO> (*comm, REDUCE_MAX, 3, minMaxInput, minMaxOutput);
849  minAllGID_ = -minMaxOutput[0];
850  maxAllGID_ = minMaxOutput[1];
851  const GO globalDist = minMaxOutput[2];
852  distributed_ = (comm_->getSize () > 1 && globalDist == 1);
853  }
854  else { // unsigned; use two reductions
855  // This is always correct, no matter the signedness of GO.
856  reduceAll<int, GO> (*comm_, REDUCE_MIN, minMyGID_, outArg (minAllGID_));
857  reduceAll<int, GO> (*comm_, REDUCE_MAX, maxMyGID_, outArg (maxAllGID_));
858  distributed_ = checkIsDist ();
859  }
860 
861  contiguous_ = false; // "Contiguous" is conservative.
862 
863  TEUCHOS_TEST_FOR_EXCEPTION(
864  minAllGID_ < indexBase_,
865  std::invalid_argument,
866  "Tpetra::Map constructor (noncontiguous): "
867  "Minimum global ID = " << minAllGID_ << " over all process(es) is "
868  "less than the given indexBase = " << indexBase_ << ".");
869 
870  // Create the Directory on demand in getRemoteIndexList().
871  //setupDirectory ();
872  }
873 
874  template <class LocalOrdinal, class GlobalOrdinal, class Node>
876  Map (const global_size_t numGlobalElements,
877  const GlobalOrdinal indexList[],
878  const LocalOrdinal indexListSize,
879  const GlobalOrdinal indexBase,
880  const Teuchos::RCP<const Teuchos::Comm<int> >& comm) :
881  comm_ (comm),
882  uniform_ (false),
883  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
884  {
885  using std::endl;
886  const char funcName[] =
887  "Map(gblNumInds,indexList,indexListSize,indexBase,comm)";
888 
889  const bool verbose = Details::Behavior::verbose("Map");
890  std::unique_ptr<std::string> prefix;
891  if (verbose) {
892  prefix = Details::createPrefix(
893  comm_.getRawPtr(), "Map", funcName);
894  std::ostringstream os;
895  os << *prefix << "Start" << endl;
896  std::cerr << os.str();
897  }
900  checkMapInputArray ("(GST, const GO[], LO, GO, comm)",
901  indexList, static_cast<size_t> (indexListSize),
902  comm.getRawPtr ());
903  // Not quite sure if I trust all code to behave correctly if the
904  // pointer is nonnull but the array length is nonzero, so I'll
905  // make sure the raw pointer is null if the length is zero.
906  const GlobalOrdinal* const indsRaw = indexListSize == 0 ? NULL : indexList;
907  Kokkos::View<const GlobalOrdinal*,
908  Kokkos::LayoutLeft,
909  Kokkos::HostSpace,
910  Kokkos::MemoryUnmanaged> inds (indsRaw, indexListSize);
911  initWithNonownedHostIndexList(funcName, numGlobalElements, inds,
912  indexBase, comm);
913  if (verbose) {
914  std::ostringstream os;
915  os << *prefix << "Done" << endl;
916  std::cerr << os.str();
917  }
918  }
919 
920  template <class LocalOrdinal, class GlobalOrdinal, class Node>
922  Map (const global_size_t numGlobalElements,
923  const Teuchos::ArrayView<const GlobalOrdinal>& entryList,
924  const GlobalOrdinal indexBase,
925  const Teuchos::RCP<const Teuchos::Comm<int> >& comm) :
926  comm_ (comm),
927  uniform_ (false),
928  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
929  {
930  using std::endl;
931  const char* funcName = "Map(gblNumInds,entryList(Teuchos::ArrayView),indexBase,comm)";
932 
933  const bool verbose = Details::Behavior::verbose("Map");
934  std::unique_ptr<std::string> prefix;
935  if (verbose) {
936  prefix = Details::createPrefix(
937  comm_.getRawPtr(), "Map", funcName);
938  std::ostringstream os;
939  os << *prefix << "Start" << endl;
940  std::cerr << os.str();
941  }
944  const size_t numLclInds = static_cast<size_t> (entryList.size ());
945  checkMapInputArray ("(GST, ArrayView, GO, comm)",
946  entryList.getRawPtr (), numLclInds,
947  comm.getRawPtr ());
948  // Not quite sure if I trust both ArrayView and View to behave
949  // correctly if the pointer is nonnull but the array length is
950  // nonzero, so I'll make sure it's null if the length is zero.
951  const GlobalOrdinal* const indsRaw =
952  numLclInds == 0 ? NULL : entryList.getRawPtr ();
953  Kokkos::View<const GlobalOrdinal*,
954  Kokkos::LayoutLeft,
955  Kokkos::HostSpace,
956  Kokkos::MemoryUnmanaged> inds (indsRaw, numLclInds);
957  initWithNonownedHostIndexList(funcName, numGlobalElements, inds,
958  indexBase, comm);
959  if (verbose) {
960  std::ostringstream os;
961  os << *prefix << "Done" << endl;
962  std::cerr << os.str();
963  }
964  }
965 
966 
967  template <class LocalOrdinal, class GlobalOrdinal, class Node>
969  Map (const global_size_t numGlobalElements,
970  const Kokkos::View<const GlobalOrdinal*, device_type>& entryList,
971  const GlobalOrdinal indexBase,
972  const Teuchos::RCP<const Teuchos::Comm<int> >& comm) :
973  comm_ (comm),
974  uniform_ (false),
975  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
976  {
977  using Kokkos::LayoutLeft;
978  using Kokkos::subview;
979  using Kokkos::View;
980  using Kokkos::view_alloc;
981  using Kokkos::WithoutInitializing;
982  using Teuchos::arcp;
983  using Teuchos::ArrayView;
984  using Teuchos::as;
985  using Teuchos::broadcast;
986  using Teuchos::outArg;
987  using Teuchos::ptr;
988  using Teuchos::REDUCE_MAX;
989  using Teuchos::REDUCE_MIN;
990  using Teuchos::REDUCE_SUM;
991  using Teuchos::reduceAll;
992  using Teuchos::typeName;
993  using std::endl;
994  using LO = local_ordinal_type;
995  using GO = global_ordinal_type;
996  using GST = global_size_t;
997  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
998  const char funcName[] =
999  "Map(gblNumInds,entryList(Kokkos::View),indexBase,comm)";
1000 
1001  const bool verbose = Details::Behavior::verbose("Map");
1002  std::unique_ptr<std::string> prefix;
1003  if (verbose) {
1004  prefix = Details::createPrefix(
1005  comm_.getRawPtr(), "Map", funcName);
1006  std::ostringstream os;
1007  os << *prefix << "Start" << endl;
1008  std::cerr << os.str();
1009  }
1011  Tpetra::Details::ProfilingRegion pr(funcName);
1012  checkMapInputArray ("(GST, Kokkos::View, GO, comm)",
1013  entryList.data (),
1014  static_cast<size_t> (entryList.extent (0)),
1015  comm.getRawPtr ());
1016 
1017  // The user has specified the distribution of indices over the
1018  // processes, via the input array of global indices on each
1019  // process. The distribution is not necessarily contiguous or
1020  // equally shared over the processes.
1021 
1022  // The length of the input array on this process is the number of
1023  // local indices to associate with this process, even though the
1024  // input array contains global indices. We assume that the number
1025  // of local indices on a process can be stored in a size_t;
1026  // numLocalElements_ is a size_t, so this variable and that should
1027  // have the same type.
1028  const size_t numLocalElements(entryList.size());
1029 
1030  initialNonuniformDebugCheck(funcName, numGlobalElements,
1031  numLocalElements, indexBase, comm);
1032 
1033  // NOTE (mfh 20 Feb 2013, 10 Oct 2016) In some sense, this global
1034  // reduction is redundant, since the directory Map will have to do
1035  // the same thing. Thus, we could do the scan and broadcast for
1036  // the directory Map here, and give the computed offsets to the
1037  // directory Map's constructor. However, a reduction costs less
1038  // than a scan and broadcast, so this still saves time if users of
1039  // this Map don't ever need the Directory (i.e., if they never
1040  // call getRemoteIndexList on this Map).
1041  if (numGlobalElements != GSTI) {
1042  numGlobalElements_ = numGlobalElements; // Use the user's value.
1043  }
1044  else { // The user wants us to compute the sum.
1045  reduceAll(*comm, REDUCE_SUM,
1046  static_cast<GST>(numLocalElements),
1047  outArg(numGlobalElements_));
1048  }
1049 
1050 
1051  // mfh 20 Feb 2013: We've never quite done the right thing for
1052  // duplicate GIDs here. Duplicate GIDs have always been counted
1053  // distinctly in numLocalElements_, and thus should get a
1054  // different LID. However, we've always used std::map or a hash
1055  // table for the GID -> LID lookup table, so distinct GIDs always
1056  // map to the same LID. Furthermore, the order of the input GID
1057  // list matters, so it's not desirable to sort for determining
1058  // uniqueness.
1059  //
1060  // I've chosen for now to write this code as if the input GID list
1061  // contains no duplicates. If this is not desired, we could use
1062  // the lookup table itself to determine uniqueness: If we haven't
1063  // seen the GID before, it gets a new LID and it's added to the
1064  // LID -> GID and GID -> LID tables. If we have seen the GID
1065  // before, it doesn't get added to either table. I would
1066  // implement this, but it would cost more to do the double lookups
1067  // in the table (one to check, and one to insert).
1068  //
1069  // More importantly, since we build the GID -> LID table in (a
1070  // thread-) parallel (way), the order in which duplicate GIDs may
1071  // get inserted is not defined. This would make the assignment of
1072  // LID to GID nondeterministic.
1073 
1074  numLocalElements_ = numLocalElements;
1075  indexBase_ = indexBase;
1076 
1077  minMyGID_ = indexBase_;
1078  maxMyGID_ = indexBase_;
1079 
1080  // NOTE (mfh 27 May 2015): While finding the initial contiguous
1081  // GID range requires looking at all the GIDs in the range,
1082  // dismissing an interval of GIDs only requires looking at the
1083  // first and last GIDs. Thus, we could do binary search backwards
1084  // from the end in order to catch the common case of a contiguous
1085  // interval followed by noncontiguous entries. On the other hand,
1086  // we could just expose this case explicitly as yet another Map
1087  // constructor, and avoid the trouble of detecting it.
1088  if (numLocalElements_ > 0) {
1089  // Find contiguous GID range, with the restriction that the
1090  // beginning of the range starts with the first entry. While
1091  // doing so, fill in the LID -> GID table.
1092  typename decltype (lgMap_)::non_const_type lgMap
1093  (view_alloc ("lgMap", WithoutInitializing), numLocalElements_);
1094 
1095  // Because you can't use lambdas in constructors on CUDA. Or using private/protected data.
1096  // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
1097  Kokkos::deep_copy(typename device_type::execution_space(),lgMap,entryList);
1098  LO lastContiguousGID_loc;
1099  computeConstantsOnDevice(entryList,minMyGID_,maxMyGID_,firstContiguousGID_,lastContiguousGID_,lastContiguousGID_loc);
1100  LO firstNonContiguous_loc = lastContiguousGID_loc+1;
1101  auto nonContigGids = Kokkos::subview(entryList,std::pair<size_t,size_t>(firstNonContiguous_loc,entryList.extent(0)));
1102 
1103  // NOTE: We do not fill the glMapHost_ and lgMapHost_ views here. They will be filled lazily later
1104  glMap_ = global_to_local_table_type(nonContigGids,
1105  firstNonContiguous_loc);
1106 
1107  // "Commit" the local-to-global lookup table we filled in above.
1108  lgMap_ = lgMap;
1109 
1110  }
1111  else {
1112  minMyGID_ = std::numeric_limits<GlobalOrdinal>::max();
1113  maxMyGID_ = std::numeric_limits<GlobalOrdinal>::lowest();
1114  // This insures tests for GIDs in the range
1115  // [firstContiguousGID_, lastContiguousGID_] fail for processes
1116  // with no local elements.
1117  firstContiguousGID_ = indexBase_+1;
1118  lastContiguousGID_ = indexBase_;
1119  // glMap_ was default constructed, so it's already empty.
1120  }
1121 
1122 
1123  // Compute the min and max of all processes' GIDs. If
1124  // numLocalElements_ == 0 on this process, minMyGID_ and maxMyGID_
1125  // are both indexBase_. This is wrong, but fixing it would
1126  // require either a fancy sparse all-reduce, or a custom reduction
1127  // operator that ignores invalid values ("invalid" means
1128  // Tpetra::Details::OrdinalTraits<GO>::invalid()).
1129  //
1130  // Also, while we're at it, use the same all-reduce to figure out
1131  // if the Map is distributed. "Distributed" means that there is
1132  // at least one process with a number of local elements less than
1133  // the number of global elements.
1134  //
1135  // We're computing the min and max of all processes' GIDs using a
1136  // single MAX all-reduce, because min(x,y) = -max(-x,-y) (when x
1137  // and y are signed). (This lets us combine the min and max into
1138  // a single all-reduce.) If each process sets localDist=1 if its
1139  // number of local elements is strictly less than the number of
1140  // global elements, and localDist=0 otherwise, then a MAX
1141  // all-reduce on localDist tells us if the Map is distributed (1
1142  // if yes, 0 if no). Thus, we can append localDist onto the end
1143  // of the data and get the global result from the all-reduce.
1144  if (std::numeric_limits<GO>::is_signed) {
1145  // Does my process NOT own all the elements?
1146  const GO localDist =
1147  (as<GST> (numLocalElements_) < numGlobalElements_) ? 1 : 0;
1148 
1149  GO minMaxInput[3];
1150  minMaxInput[0] = -minMyGID_;
1151  minMaxInput[1] = maxMyGID_;
1152  minMaxInput[2] = localDist;
1153 
1154  GO minMaxOutput[3];
1155  minMaxOutput[0] = 0;
1156  minMaxOutput[1] = 0;
1157  minMaxOutput[2] = 0;
1158  reduceAll<int, GO> (*comm, REDUCE_MAX, 3, minMaxInput, minMaxOutput);
1159  minAllGID_ = -minMaxOutput[0];
1160  maxAllGID_ = minMaxOutput[1];
1161  const GO globalDist = minMaxOutput[2];
1162  distributed_ = (comm_->getSize () > 1 && globalDist == 1);
1163  }
1164  else { // unsigned; use two reductions
1165  // This is always correct, no matter the signedness of GO.
1166  reduceAll<int, GO> (*comm_, REDUCE_MIN, minMyGID_, outArg (minAllGID_));
1167  reduceAll<int, GO> (*comm_, REDUCE_MAX, maxMyGID_, outArg (maxAllGID_));
1168  distributed_ = checkIsDist ();
1169  }
1170 
1171  contiguous_ = false; // "Contiguous" is conservative.
1172 
1173  TEUCHOS_TEST_FOR_EXCEPTION(
1174  minAllGID_ < indexBase_,
1175  std::invalid_argument,
1176  "Tpetra::Map constructor (noncontiguous): "
1177  "Minimum global ID = " << minAllGID_ << " over all process(es) is "
1178  "less than the given indexBase = " << indexBase_ << ".");
1179 
1180  // Create the Directory on demand in getRemoteIndexList().
1181  //setupDirectory ();
1182 
1183  if (verbose) {
1184  std::ostringstream os;
1185  os << *prefix << "Done" << endl;
1186  std::cerr << os.str();
1187  }
1188  }
1189 
1190 
1191  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1193  {
1194  if (! Kokkos::is_initialized ()) {
1195  std::ostringstream os;
1196  os << "WARNING: Tpetra::Map destructor (~Map()) is being called after "
1197  "Kokkos::finalize() has been called. This is user error! There are "
1198  "two likely causes: " << std::endl <<
1199  " 1. You have a static Tpetra::Map (or RCP or shared_ptr of a Map)"
1200  << std::endl <<
1201  " 2. You declare and construct a Tpetra::Map (or RCP or shared_ptr "
1202  "of a Tpetra::Map) at the same scope in main() as Kokkos::finalize() "
1203  "or Tpetra::finalize()." << std::endl << std::endl <<
1204  "Don't do either of these! Please refer to GitHib Issue #2372."
1205  << std::endl;
1206  ::Tpetra::Details::printOnce (std::cerr, os.str (),
1207  this->getComm ().getRawPtr ());
1208  }
1209  else {
1210  using ::Tpetra::Details::mpiIsInitialized;
1211  using ::Tpetra::Details::mpiIsFinalized;
1212  using ::Tpetra::Details::teuchosCommIsAnMpiComm;
1213 
1214  Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getComm ();
1215  if (! comm.is_null () && teuchosCommIsAnMpiComm (*comm) &&
1216  mpiIsInitialized () && mpiIsFinalized ()) {
1217  // Tpetra itself does not require MPI, even if building with
1218  // MPI. It is legal to create Tpetra objects that do not use
1219  // MPI, even in an MPI program. However, calling Tpetra stuff
1220  // after MPI_Finalize() has been called is a bad idea, since
1221  // some Tpetra defaults may use MPI if available.
1222  std::ostringstream os;
1223  os << "WARNING: Tpetra::Map destructor (~Map()) is being called after "
1224  "MPI_Finalize() has been called. This is user error! There are "
1225  "two likely causes: " << std::endl <<
1226  " 1. You have a static Tpetra::Map (or RCP or shared_ptr of a Map)"
1227  << std::endl <<
1228  " 2. You declare and construct a Tpetra::Map (or RCP or shared_ptr "
1229  "of a Tpetra::Map) at the same scope in main() as MPI_finalize() or "
1230  "Tpetra::finalize()." << std::endl << std::endl <<
1231  "Don't do either of these! Please refer to GitHib Issue #2372."
1232  << std::endl;
1233  ::Tpetra::Details::printOnce (std::cerr, os.str (), comm.getRawPtr ());
1234  }
1235  }
1236  // mfh 20 Mar 2018: We can't check Tpetra::isInitialized() yet,
1237  // because Tpetra does not yet require Tpetra::initialize /
1238  // Tpetra::finalize.
1239  }
1240 
1241  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1242  bool
1244  {
1245  TEUCHOS_TEST_FOR_EXCEPTION(
1246  getComm ().is_null (), std::logic_error, "Tpetra::Map::isOneToOne: "
1247  "getComm() returns null. Please report this bug to the Tpetra "
1248  "developers.");
1249 
1250  // This is a collective operation, if it hasn't been called before.
1251  setupDirectory ();
1252  return directory_->isOneToOne (*this);
1253  }
1254 
1255  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1256  LocalOrdinal
1258  getLocalElement (GlobalOrdinal globalIndex) const
1259  {
1260  if (isContiguous ()) {
1261  if (globalIndex < getMinGlobalIndex () ||
1262  globalIndex > getMaxGlobalIndex ()) {
1263  return Tpetra::Details::OrdinalTraits<LocalOrdinal>::invalid ();
1264  }
1265  return static_cast<LocalOrdinal> (globalIndex - getMinGlobalIndex ());
1266  }
1267  else if (globalIndex >= firstContiguousGID_ &&
1268  globalIndex <= lastContiguousGID_) {
1269  return static_cast<LocalOrdinal> (globalIndex - firstContiguousGID_);
1270  }
1271  else {
1272  // If the given global index is not in the table, this returns
1273  // the same value as OrdinalTraits<LocalOrdinal>::invalid().
1274  // glMapHost_ is Host and does not assume UVM
1275  lazyPushToHost();
1276  return glMapHost_.get (globalIndex);
1277  }
1278  }
1279 
1280  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1281  GlobalOrdinal
1283  getGlobalElement (LocalOrdinal localIndex) const
1284  {
1285  if (localIndex < getMinLocalIndex () || localIndex > getMaxLocalIndex ()) {
1286  return Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ();
1287  }
1288  if (isContiguous ()) {
1289  return getMinGlobalIndex () + localIndex;
1290  }
1291  else {
1292  // This is a host Kokkos::View access, with no RCP or ArrayRCP
1293  // involvement. As a result, it is thread safe.
1294  //
1295  // lgMapHost_ is a host pointer; this does NOT assume UVM.
1296  lazyPushToHost();
1297  return lgMapHost_[localIndex];
1298  }
1299  }
1300 
1301  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1302  bool
1304  isNodeLocalElement (LocalOrdinal localIndex) const
1305  {
1306  if (localIndex < getMinLocalIndex () || localIndex > getMaxLocalIndex ()) {
1307  return false;
1308  } else {
1309  return true;
1310  }
1311  }
1312 
1313  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1314  bool
1316  isNodeGlobalElement (GlobalOrdinal globalIndex) const {
1317  return this->getLocalElement (globalIndex) !=
1318  Tpetra::Details::OrdinalTraits<LocalOrdinal>::invalid ();
1319  }
1320 
1321  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1323  return uniform_;
1324  }
1325 
1326  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1328  return contiguous_;
1329  }
1330 
1331 
1332  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1336  {
1337  return local_map_type (glMap_, lgMap_, getIndexBase (),
1338  getMinGlobalIndex (), getMaxGlobalIndex (),
1339  firstContiguousGID_, lastContiguousGID_,
1340  getLocalNumElements (), isContiguous ());
1341  }
1342 
1343  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1344  bool
1347  {
1348  using Teuchos::outArg;
1349  using Teuchos::REDUCE_MIN;
1350  using Teuchos::reduceAll;
1351  //
1352  // Tests that avoid the Boolean all-reduce below by using
1353  // globally consistent quantities.
1354  //
1355  if (this == &map) {
1356  // Pointer equality on one process always implies pointer
1357  // equality on all processes, since Map is immutable.
1358  return true;
1359  }
1360  else if (getComm ()->getSize () != map.getComm ()->getSize ()) {
1361  // The two communicators have different numbers of processes.
1362  // It's not correct to call isCompatible() in that case. This
1363  // may result in the all-reduce hanging below.
1364  return false;
1365  }
1366  else if (getGlobalNumElements () != map.getGlobalNumElements ()) {
1367  // Two Maps are definitely NOT compatible if they have different
1368  // global numbers of indices.
1369  return false;
1370  }
1371  else if (isContiguous () && isUniform () &&
1372  map.isContiguous () && map.isUniform ()) {
1373  // Contiguous uniform Maps with the same number of processes in
1374  // their communicators, and with the same global numbers of
1375  // indices, are always compatible.
1376  return true;
1377  }
1378  else if (! isContiguous () && ! map.isContiguous () &&
1379  lgMap_.extent (0) != 0 && map.lgMap_.extent (0) != 0 &&
1380  lgMap_.data () == map.lgMap_.data ()) {
1381  // Noncontiguous Maps whose global index lists are nonempty and
1382  // have the same pointer must be the same (and therefore
1383  // contiguous).
1384  //
1385  // Nonempty is important. For example, consider a communicator
1386  // with two processes, and two Maps that share this
1387  // communicator, with zero global indices on the first process,
1388  // and different nonzero numbers of global indices on the second
1389  // process. In that case, on the first process, the pointers
1390  // would both be NULL.
1391  return true;
1392  }
1393 
1394  TEUCHOS_TEST_FOR_EXCEPTION(
1395  getGlobalNumElements () != map.getGlobalNumElements (), std::logic_error,
1396  "Tpetra::Map::isCompatible: There's a bug in this method. We've already "
1397  "checked that this condition is true above, but it's false here. "
1398  "Please report this bug to the Tpetra developers.");
1399 
1400  // Do both Maps have the same number of indices on each process?
1401  const int locallyCompat =
1402  (getLocalNumElements () == map.getLocalNumElements ()) ? 1 : 0;
1403 
1404  int globallyCompat = 0;
1405  reduceAll<int, int> (*comm_, REDUCE_MIN, locallyCompat, outArg (globallyCompat));
1406  return (globallyCompat == 1);
1407  }
1408 
1409  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1410  bool
1413  {
1414  using Teuchos::ArrayView;
1415  using GO = global_ordinal_type;
1416  using size_type = typename ArrayView<const GO>::size_type;
1417 
1418  // If both Maps are contiguous, we can compare their GID ranges
1419  // easily by looking at the min and max GID on this process.
1420  // Otherwise, we'll compare their GID lists. If only one Map is
1421  // contiguous, then we only have to call getLocalElementList() on
1422  // the noncontiguous Map. (It's best to avoid calling it on a
1423  // contiguous Map, since it results in unnecessary storage that
1424  // persists for the lifetime of the Map.)
1425 
1426  if (this == &map) {
1427  // Pointer equality on one process always implies pointer
1428  // equality on all processes, since Map is immutable.
1429  return true;
1430  }
1431  else if (getLocalNumElements () != map.getLocalNumElements ()) {
1432  return false;
1433  }
1434  else if (getMinGlobalIndex () != map.getMinGlobalIndex () ||
1435  getMaxGlobalIndex () != map.getMaxGlobalIndex ()) {
1436  return false;
1437  }
1438  else {
1439  if (isContiguous ()) {
1440  if (map.isContiguous ()) {
1441  return true; // min and max match, so the ranges match.
1442  }
1443  else { // *this is contiguous, but map is not contiguous
1444  TEUCHOS_TEST_FOR_EXCEPTION(
1445  ! this->isContiguous () || map.isContiguous (), std::logic_error,
1446  "Tpetra::Map::locallySameAs: BUG");
1447  ArrayView<const GO> rhsElts = map.getLocalElementList ();
1448  const GO minLhsGid = this->getMinGlobalIndex ();
1449  const size_type numRhsElts = rhsElts.size ();
1450  for (size_type k = 0; k < numRhsElts; ++k) {
1451  const GO curLhsGid = minLhsGid + static_cast<GO> (k);
1452  if (curLhsGid != rhsElts[k]) {
1453  return false; // stop on first mismatch
1454  }
1455  }
1456  return true;
1457  }
1458  }
1459  else if (map.isContiguous ()) { // *this is not contiguous, but map is
1460  TEUCHOS_TEST_FOR_EXCEPTION(
1461  this->isContiguous () || ! map.isContiguous (), std::logic_error,
1462  "Tpetra::Map::locallySameAs: BUG");
1463  ArrayView<const GO> lhsElts = this->getLocalElementList ();
1464  const GO minRhsGid = map.getMinGlobalIndex ();
1465  const size_type numLhsElts = lhsElts.size ();
1466  for (size_type k = 0; k < numLhsElts; ++k) {
1467  const GO curRhsGid = minRhsGid + static_cast<GO> (k);
1468  if (curRhsGid != lhsElts[k]) {
1469  return false; // stop on first mismatch
1470  }
1471  }
1472  return true;
1473  }
1474  else if (this->lgMap_.data () == map.lgMap_.data ()) {
1475  // Pointers to LID->GID "map" (actually just an array) are the
1476  // same, and the number of GIDs are the same.
1477  return this->getLocalNumElements () == map.getLocalNumElements ();
1478  }
1479  else { // we actually have to compare the GIDs
1480  if (this->getLocalNumElements () != map.getLocalNumElements ()) {
1481  return false; // We already checked above, but check just in case
1482  }
1483  else {
1484  ArrayView<const GO> lhsElts = getLocalElementList ();
1485  ArrayView<const GO> rhsElts = map.getLocalElementList ();
1486 
1487  // std::equal requires that the latter range is as large as
1488  // the former. We know the ranges have equal length, because
1489  // they have the same number of local entries.
1490  return std::equal (lhsElts.begin (), lhsElts.end (), rhsElts.begin ());
1491  }
1492  }
1493  }
1494  }
1495 
1496  template <class LocalOrdinal,class GlobalOrdinal, class Node>
1497  bool
1500  {
1501  if (this == &map)
1502  return true;
1503 
1504  // We are going to check if lmap1 is fitted into lmap2:
1505  // Is lmap1 (map) a subset of lmap2 (this)?
1506  // And do the first lmap1.getLocalNumElements() global elements
1507  // of lmap1,lmap2 owned on each process exactly match?
1508  auto lmap1 = map.getLocalMap();
1509  auto lmap2 = this->getLocalMap();
1510 
1511  auto numLocalElements1 = lmap1.getLocalNumElements();
1512  auto numLocalElements2 = lmap2.getLocalNumElements();
1513 
1514  if (numLocalElements1 > numLocalElements2) {
1515  // There are more indices in the first map on this process than in second map.
1516  return false;
1517  }
1518 
1519  if (lmap1.isContiguous () && lmap2.isContiguous ()) {
1520  // When both Maps are contiguous, just check the interval inclusion.
1521  return ((lmap1.getMinGlobalIndex () == lmap2.getMinGlobalIndex ()) &&
1522  (lmap1.getMaxGlobalIndex () <= lmap2.getMaxGlobalIndex ()));
1523  }
1524 
1525  if (lmap1.getMinGlobalIndex () < lmap2.getMinGlobalIndex () ||
1526  lmap1.getMaxGlobalIndex () > lmap2.getMaxGlobalIndex ()) {
1527  // The second map does not include the first map bounds, and thus some of
1528  // the first map global indices are not in the second map.
1529  return false;
1530  }
1531 
1532  using LO = local_ordinal_type;
1533  using range_type =
1534  Kokkos::RangePolicy<LO, typename node_type::execution_space>;
1535 
1536  // Check all elements.
1537  LO numDiff = 0;
1538  Kokkos::parallel_reduce(
1539  "isLocallyFitted",
1540  range_type(0, numLocalElements1),
1541  KOKKOS_LAMBDA (const LO i, LO& diff) {
1542  diff += (lmap1.getGlobalElement(i) != lmap2.getGlobalElement(i));
1543  }, numDiff);
1544 
1545  return (numDiff == 0);
1546  }
1547 
1548  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1549  bool
1552  {
1553  using Teuchos::outArg;
1554  using Teuchos::REDUCE_MIN;
1555  using Teuchos::reduceAll;
1556  //
1557  // Tests that avoid the Boolean all-reduce below by using
1558  // globally consistent quantities.
1559  //
1560  if (this == &map) {
1561  // Pointer equality on one process always implies pointer
1562  // equality on all processes, since Map is immutable.
1563  return true;
1564  }
1565  else if (getComm ()->getSize () != map.getComm ()->getSize ()) {
1566  // The two communicators have different numbers of processes.
1567  // It's not correct to call isSameAs() in that case. This
1568  // may result in the all-reduce hanging below.
1569  return false;
1570  }
1571  else if (getGlobalNumElements () != map.getGlobalNumElements ()) {
1572  // Two Maps are definitely NOT the same if they have different
1573  // global numbers of indices.
1574  return false;
1575  }
1576  else if (getMinAllGlobalIndex () != map.getMinAllGlobalIndex () ||
1577  getMaxAllGlobalIndex () != map.getMaxAllGlobalIndex () ||
1578  getIndexBase () != map.getIndexBase ()) {
1579  // If the global min or max global index doesn't match, or if
1580  // the index base doesn't match, then the Maps aren't the same.
1581  return false;
1582  }
1583  else if (isDistributed () != map.isDistributed ()) {
1584  // One Map is distributed and the other is not, which means that
1585  // the Maps aren't the same.
1586  return false;
1587  }
1588  else if (isContiguous () && isUniform () &&
1589  map.isContiguous () && map.isUniform ()) {
1590  // Contiguous uniform Maps with the same number of processes in
1591  // their communicators, with the same global numbers of indices,
1592  // and with matching index bases and ranges, must be the same.
1593  return true;
1594  }
1595 
1596  // The two communicators must have the same number of processes,
1597  // with process ranks occurring in the same order. This uses
1598  // MPI_COMM_COMPARE. The MPI 3.1 standard (Section 6.4) says:
1599  // "Operations that access communicators are local and their
1600  // execution does not require interprocess communication."
1601  // However, just to be sure, I'll put this call after the above
1602  // tests that don't communicate.
1603  if (! ::Tpetra::Details::congruent (*comm_, * (map.getComm ()))) {
1604  return false;
1605  }
1606 
1607  // If we get this far, we need to check local properties and then
1608  // communicate local sameness across all processes.
1609  const int isSame_lcl = locallySameAs (map) ? 1 : 0;
1610 
1611  // Return true if and only if all processes report local sameness.
1612  int isSame_gbl = 0;
1613  reduceAll<int, int> (*comm_, REDUCE_MIN, isSame_lcl, outArg (isSame_gbl));
1614  return isSame_gbl == 1;
1615  }
1616 
1617  namespace { // (anonymous)
1618  template <class LO, class GO, class DT>
1619  class FillLgMap {
1620  public:
1621  FillLgMap (const Kokkos::View<GO*, DT>& lgMap,
1622  const GO startGid) :
1623  lgMap_ (lgMap), startGid_ (startGid)
1624  {
1625  Kokkos::RangePolicy<LO, typename DT::execution_space>
1626  range (static_cast<LO> (0), static_cast<LO> (lgMap.size ()));
1627  Kokkos::parallel_for (range, *this);
1628  }
1629 
1630  KOKKOS_INLINE_FUNCTION void operator () (const LO& lid) const {
1631  lgMap_(lid) = startGid_ + static_cast<GO> (lid);
1632  }
1633 
1634  private:
1635  const Kokkos::View<GO*, DT> lgMap_;
1636  const GO startGid_;
1637  };
1638 
1639  } // namespace (anonymous)
1640 
1641 
1642  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1643  typename Map<LocalOrdinal,GlobalOrdinal,Node>::global_indices_array_type
1645  {
1646  using std::endl;
1647  using LO = local_ordinal_type;
1648  using GO = global_ordinal_type;
1649  using const_lg_view_type = decltype(lgMap_);
1650  using lg_view_type = typename const_lg_view_type::non_const_type;
1651  const bool debug = Details::Behavior::debug("Map");
1652  const bool verbose = Details::Behavior::verbose("Map");
1653 
1654  std::unique_ptr<std::string> prefix;
1655  if (verbose) {
1656  prefix = Details::createPrefix(
1657  comm_.getRawPtr(), "Map", "getMyGlobalIndices");
1658  std::ostringstream os;
1659  os << *prefix << "Start" << endl;
1660  std::cerr << os.str();
1661  }
1662 
1663  // If the local-to-global mapping doesn't exist yet, and if we
1664  // have local entries, then create and fill the local-to-global
1665  // mapping.
1666  const bool needToCreateLocalToGlobalMapping =
1667  lgMap_.extent (0) == 0 && numLocalElements_ > 0;
1668 
1669  if (needToCreateLocalToGlobalMapping) {
1670  if (verbose) {
1671  std::ostringstream os;
1672  os << *prefix << "Need to create lgMap" << endl;
1673  std::cerr << os.str();
1674  }
1675  if (debug) {
1676  // The local-to-global mapping should have been set up already
1677  // for a noncontiguous map.
1678  TEUCHOS_TEST_FOR_EXCEPTION
1679  (! isContiguous(), std::logic_error,
1680  "Tpetra::Map::getMyGlobalIndices: The local-to-global "
1681  "mapping (lgMap_) should have been set up already for a "
1682  "noncontiguous Map. Please report this bug to the Tpetra "
1683  "developers.");
1684  }
1685  const LO numElts = static_cast<LO> (getLocalNumElements ());
1686 
1687  using Kokkos::view_alloc;
1688  using Kokkos::WithoutInitializing;
1689  lg_view_type lgMap ("lgMap", numElts);
1690  if (verbose) {
1691  std::ostringstream os;
1692  os << *prefix << "Fill lgMap" << endl;
1693  std::cerr << os.str();
1694  }
1695  FillLgMap<LO, GO, device_type> fillIt (lgMap, minMyGID_);
1696 
1697  if (verbose) {
1698  std::ostringstream os;
1699  os << *prefix << "Copy lgMap to lgMapHost" << endl;
1700  std::cerr << os.str();
1701  }
1702 
1703  auto lgMapHost = Kokkos::create_mirror_view (Kokkos::HostSpace (), lgMap);
1704  // DEEP_COPY REVIEW - DEVICE-TO-HOST
1705  auto exec_instance = execution_space();
1706  Kokkos::deep_copy (exec_instance, lgMapHost, lgMap);
1707 
1708  // There's a non-trivial chance we'll grab this on the host,
1709  // so let's make sure the copy finishes
1710  exec_instance.fence();
1711 
1712  // "Commit" the local-to-global lookup table we filled in above.
1713  lgMap_ = lgMap;
1714  lgMapHost_ = lgMapHost;
1715  }
1716  else {
1717  lazyPushToHost();
1718  }
1719 
1720  if (verbose) {
1721  std::ostringstream os;
1722  os << *prefix << "Done" << endl;
1723  std::cerr << os.str();
1724  }
1725  return lgMapHost_;
1726  }
1727 
1728 
1729  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1730  typename Map<LocalOrdinal,GlobalOrdinal,Node>::global_indices_array_device_type
1732  {
1733  using std::endl;
1734  using LO = local_ordinal_type;
1735  using GO = global_ordinal_type;
1736  using const_lg_view_type = decltype(lgMap_);
1737  using lg_view_type = typename const_lg_view_type::non_const_type;
1738  const bool debug = Details::Behavior::debug("Map");
1739  const bool verbose = Details::Behavior::verbose("Map");
1740 
1741  std::unique_ptr<std::string> prefix;
1742  if (verbose) {
1743  prefix = Details::createPrefix(
1744  comm_.getRawPtr(), "Map", "getMyGlobalIndicesDevice");
1745  std::ostringstream os;
1746  os << *prefix << "Start" << endl;
1747  std::cerr << os.str();
1748  }
1749 
1750  // If the local-to-global mapping doesn't exist yet, and if we
1751  // have local entries, then create and fill the local-to-global
1752  // mapping.
1753  const bool needToCreateLocalToGlobalMapping =
1754  lgMap_.extent (0) == 0 && numLocalElements_ > 0;
1755 
1756  if (needToCreateLocalToGlobalMapping) {
1757  if (verbose) {
1758  std::ostringstream os;
1759  os << *prefix << "Need to create lgMap" << endl;
1760  std::cerr << os.str();
1761  }
1762  if (debug) {
1763  // The local-to-global mapping should have been set up already
1764  // for a noncontiguous map.
1765  TEUCHOS_TEST_FOR_EXCEPTION
1766  (! isContiguous(), std::logic_error,
1767  "Tpetra::Map::getMyGlobalIndices: The local-to-global "
1768  "mapping (lgMap_) should have been set up already for a "
1769  "noncontiguous Map. Please report this bug to the Tpetra "
1770  "developers.");
1771  }
1772  const LO numElts = static_cast<LO> (getLocalNumElements ());
1773 
1774  using Kokkos::view_alloc;
1775  using Kokkos::WithoutInitializing;
1776  lg_view_type lgMap ("lgMap", numElts);
1777  if (verbose) {
1778  std::ostringstream os;
1779  os << *prefix << "Fill lgMap" << endl;
1780  std::cerr << os.str();
1781  }
1782  FillLgMap<LO, GO, device_type> fillIt (lgMap, minMyGID_);
1783 
1784  // "Commit" the local-to-global lookup table we filled in above.
1785  lgMap_ = lgMap;
1786  }
1787 
1788  if (verbose) {
1789  std::ostringstream os;
1790  os << *prefix << "Done" << endl;
1791  std::cerr << os.str();
1792  }
1793  return lgMap_;
1794  }
1795 
1796 
1797 
1798  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1799  Teuchos::ArrayView<const GlobalOrdinal>
1801  {
1802  using GO = global_ordinal_type;
1803 
1804  // If the local-to-global mapping doesn't exist yet, and if we
1805  // have local entries, then create and fill the local-to-global
1806  // mapping.
1807  (void) this->getMyGlobalIndices ();
1808 
1809  // This does NOT assume UVM; lgMapHost_ is a host pointer.
1810  lazyPushToHost();
1811  const GO* lgMapHostRawPtr = lgMapHost_.data ();
1812  // The third argument forces ArrayView not to try to track memory
1813  // in a debug build. We have to use it because the memory does
1814  // not belong to a Teuchos memory management class.
1815  return Teuchos::ArrayView<const GO>(
1816  lgMapHostRawPtr,
1817  lgMapHost_.extent (0),
1818  Teuchos::RCP_DISABLE_NODE_LOOKUP);
1819  }
1820 
1821  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1823  return distributed_;
1824  }
1825 
1826  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1828  using Teuchos::TypeNameTraits;
1829  std::ostringstream os;
1830 
1831  os << "Tpetra::Map: {"
1832  << "LocalOrdinalType: " << TypeNameTraits<LocalOrdinal>::name ()
1833  << ", GlobalOrdinalType: " << TypeNameTraits<GlobalOrdinal>::name ()
1834  << ", NodeType: " << TypeNameTraits<Node>::name ();
1835  if (this->getObjectLabel () != "") {
1836  os << ", Label: \"" << this->getObjectLabel () << "\"";
1837  }
1838  os << ", Global number of entries: " << getGlobalNumElements ()
1839  << ", Number of processes: " << getComm ()->getSize ()
1840  << ", Uniform: " << (isUniform () ? "true" : "false")
1841  << ", Contiguous: " << (isContiguous () ? "true" : "false")
1842  << ", Distributed: " << (isDistributed () ? "true" : "false")
1843  << "}";
1844  return os.str ();
1845  }
1846 
1851  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1852  std::string
1854  localDescribeToString (const Teuchos::EVerbosityLevel vl) const
1855  {
1856  using LO = local_ordinal_type;
1857  using std::endl;
1858 
1859  // This preserves current behavior of Map.
1860  if (vl < Teuchos::VERB_HIGH) {
1861  return std::string ();
1862  }
1863  auto outStringP = Teuchos::rcp (new std::ostringstream ());
1864  Teuchos::RCP<Teuchos::FancyOStream> outp =
1865  Teuchos::getFancyOStream (outStringP);
1866  Teuchos::FancyOStream& out = *outp;
1867 
1868  auto comm = this->getComm ();
1869  const int myRank = comm->getRank ();
1870  const int numProcs = comm->getSize ();
1871  out << "Process " << myRank << " of " << numProcs << ":" << endl;
1872  Teuchos::OSTab tab1 (out);
1873 
1874  const LO numEnt = static_cast<LO> (this->getLocalNumElements ());
1875  out << "My number of entries: " << numEnt << endl
1876  << "My minimum global index: " << this->getMinGlobalIndex () << endl
1877  << "My maximum global index: " << this->getMaxGlobalIndex () << endl;
1878 
1879  if (vl == Teuchos::VERB_EXTREME) {
1880  out << "My global indices: [";
1881  const LO minLclInd = this->getMinLocalIndex ();
1882  for (LO k = 0; k < numEnt; ++k) {
1883  out << minLclInd + this->getGlobalElement (k);
1884  if (k + 1 < numEnt) {
1885  out << ", ";
1886  }
1887  }
1888  out << "]" << endl;
1889  }
1890 
1891  out.flush (); // make sure the ostringstream got everything
1892  return outStringP->str ();
1893  }
1894 
1895  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1896  void
1898  describe (Teuchos::FancyOStream &out,
1899  const Teuchos::EVerbosityLevel verbLevel) const
1900  {
1901  using Teuchos::TypeNameTraits;
1902  using Teuchos::VERB_DEFAULT;
1903  using Teuchos::VERB_NONE;
1904  using Teuchos::VERB_LOW;
1905  using Teuchos::VERB_HIGH;
1906  using std::endl;
1907  using LO = local_ordinal_type;
1908  using GO = global_ordinal_type;
1909  const Teuchos::EVerbosityLevel vl =
1910  (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
1911 
1912  if (vl == VERB_NONE) {
1913  return; // don't print anything
1914  }
1915  // If this Map's Comm is null, then the Map does not participate
1916  // in collective operations with the other processes. In that
1917  // case, it is not even legal to call this method. The reasonable
1918  // thing to do in that case is nothing.
1919  auto comm = this->getComm ();
1920  if (comm.is_null ()) {
1921  return;
1922  }
1923  const int myRank = comm->getRank ();
1924  const int numProcs = comm->getSize ();
1925 
1926  // Only Process 0 should touch the output stream, but this method
1927  // in general may need to do communication. Thus, we may need to
1928  // preserve the current tab level across multiple "if (myRank ==
1929  // 0) { ... }" inner scopes. This is why we sometimes create
1930  // OSTab instances by pointer, instead of by value. We only need
1931  // to create them by pointer if the tab level must persist through
1932  // multiple inner scopes.
1933  Teuchos::RCP<Teuchos::OSTab> tab0, tab1;
1934 
1935  if (myRank == 0) {
1936  // At every verbosity level but VERB_NONE, Process 0 prints.
1937  // By convention, describe() always begins with a tab before
1938  // printing.
1939  tab0 = Teuchos::rcp (new Teuchos::OSTab (out));
1940  out << "\"Tpetra::Map\":" << endl;
1941  tab1 = Teuchos::rcp (new Teuchos::OSTab (out));
1942  {
1943  out << "Template parameters:" << endl;
1944  Teuchos::OSTab tab2 (out);
1945  out << "LocalOrdinal: " << TypeNameTraits<LO>::name () << endl
1946  << "GlobalOrdinal: " << TypeNameTraits<GO>::name () << endl
1947  << "Node: " << TypeNameTraits<Node>::name () << endl;
1948  }
1949  const std::string label = this->getObjectLabel ();
1950  if (label != "") {
1951  out << "Label: \"" << label << "\"" << endl;
1952  }
1953  out << "Global number of entries: " << getGlobalNumElements () << endl
1954  << "Minimum global index: " << getMinAllGlobalIndex () << endl
1955  << "Maximum global index: " << getMaxAllGlobalIndex () << endl
1956  << "Index base: " << getIndexBase () << endl
1957  << "Number of processes: " << numProcs << endl
1958  << "Uniform: " << (isUniform () ? "true" : "false") << endl
1959  << "Contiguous: " << (isContiguous () ? "true" : "false") << endl
1960  << "Distributed: " << (isDistributed () ? "true" : "false") << endl;
1961  }
1962 
1963  // This is collective over the Map's communicator.
1964  if (vl >= VERB_HIGH) { // VERB_HIGH or VERB_EXTREME
1965  const std::string lclStr = this->localDescribeToString (vl);
1966  Tpetra::Details::gathervPrint (out, lclStr, *comm);
1967  }
1968  }
1969 
1970  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1971  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
1973  replaceCommWithSubset (const Teuchos::RCP<const Teuchos::Comm<int> >& newComm) const
1974  {
1975  using Teuchos::RCP;
1976  using Teuchos::rcp;
1977  using GST = global_size_t;
1978  using LO = local_ordinal_type;
1979  using GO = global_ordinal_type;
1980  using map_type = Map<LO, GO, Node>;
1981 
1982  // mfh 26 Mar 2013: The lazy way to do this is simply to recreate
1983  // the Map by calling its ordinary public constructor, using the
1984  // original Map's data. This only involves O(1) all-reduces over
1985  // the new communicator, which in the common case only includes a
1986  // small number of processes.
1987 
1988  // Create the Map to return.
1989  if (newComm.is_null () || newComm->getSize () < 1) {
1990  return Teuchos::null; // my process does not participate in the new Map
1991  }
1992  else if (newComm->getSize () == 1) {
1993  lazyPushToHost();
1994 
1995  // The case where the new communicator has only one process is
1996  // easy. We don't have to communicate to get all the
1997  // information we need. Use the default comm to create the new
1998  // Map, then fill in all the fields directly.
1999  RCP<map_type> newMap (new map_type ());
2000 
2001  newMap->comm_ = newComm;
2002  // mfh 07 Oct 2016: Preserve original behavior, even though the
2003  // original index base may no longer be the globally min global
2004  // index. See #616 for why this doesn't matter so much anymore.
2005  newMap->indexBase_ = this->indexBase_;
2006  newMap->numGlobalElements_ = this->numLocalElements_;
2007  newMap->numLocalElements_ = this->numLocalElements_;
2008  newMap->minMyGID_ = this->minMyGID_;
2009  newMap->maxMyGID_ = this->maxMyGID_;
2010  newMap->minAllGID_ = this->minMyGID_;
2011  newMap->maxAllGID_ = this->maxMyGID_;
2012  newMap->firstContiguousGID_ = this->firstContiguousGID_;
2013  newMap->lastContiguousGID_ = this->lastContiguousGID_;
2014  // Since the new communicator has only one process, neither
2015  // uniformity nor contiguity have changed.
2016  newMap->uniform_ = this->uniform_;
2017  newMap->contiguous_ = this->contiguous_;
2018  // The new communicator only has one process, so the new Map is
2019  // not distributed.
2020  newMap->distributed_ = false;
2021  newMap->lgMap_ = this->lgMap_;
2022  newMap->lgMapHost_ = this->lgMapHost_;
2023  newMap->glMap_ = this->glMap_;
2024  newMap->glMapHost_ = this->glMapHost_;
2025  // It's OK not to initialize the new Map's Directory.
2026  // This is initialized lazily, on first call to getRemoteIndexList.
2027 
2028  return newMap;
2029  }
2030  else { // newComm->getSize() != 1
2031  // Even if the original Map is contiguous, the new Map might not
2032  // be, especially if the excluded processes have ranks != 0 or
2033  // newComm->getSize()-1. The common case for this method is to
2034  // exclude many (possibly even all but one) processes, so it
2035  // likely doesn't pay to do the global communication (over the
2036  // original communicator) to figure out whether we can optimize
2037  // the result Map. Thus, we just set up the result Map as
2038  // noncontiguous.
2039  //
2040  // TODO (mfh 07 Oct 2016) We don't actually need to reconstruct
2041  // the global-to-local table, etc. Optimize this code path to
2042  // avoid unnecessary local work.
2043 
2044  // Make Map (re)compute the global number of elements.
2045  const GST RECOMPUTE = Tpetra::Details::OrdinalTraits<GST>::invalid ();
2046  // TODO (mfh 07 Oct 2016) If we use any Map constructor, we have
2047  // to use the noncontiguous Map constructor, since the new Map
2048  // might not be contiguous. Even if the old Map was contiguous,
2049  // some process in the "middle" might have been excluded. If we
2050  // want to avoid local work, we either have to do the setup by
2051  // hand, or write a new Map constructor.
2052 #if 1
2053  // The disabled code here throws the following exception in
2054  // Map's replaceCommWithSubset test:
2055  //
2056  // Throw test that evaluated to true: static_cast<unsigned long long> (numKeys) > static_cast<unsigned long long> (::Kokkos::ArithTraits<ValueType>::max ())
2057  // 10:
2058  // 10: Tpetra::Details::FixedHashTable: The number of keys -3 is greater than the maximum representable ValueType value 2147483647. This means that it is not possible to use this constructor.
2059  // 10: Process 3: origComm->replaceCommWithSubset(subsetComm) threw an exception: /scratch/prj/Trilinos/Trilinos/packages/tpetra/core/src/Tpetra_Details_FixedHashTable_def.hpp:1044:
2060 
2061  auto lgMap = this->getMyGlobalIndices ();
2062  using size_type =
2063  typename std::decay<decltype (lgMap.extent (0)) >::type;
2064  const size_type lclNumInds =
2065  static_cast<size_type> (this->getLocalNumElements ());
2066  using Teuchos::TypeNameTraits;
2067  TEUCHOS_TEST_FOR_EXCEPTION
2068  (lgMap.extent (0) != lclNumInds, std::logic_error,
2069  "Tpetra::Map::replaceCommWithSubset: Result of getMyGlobalIndices() "
2070  "has length " << lgMap.extent (0) << " (of type " <<
2071  TypeNameTraits<size_type>::name () << ") != this->getLocalNumElements()"
2072  " = " << this->getLocalNumElements () << ". The latter, upon being "
2073  "cast to size_type = " << TypeNameTraits<size_type>::name () << ", "
2074  "becomes " << lclNumInds << ". Please report this bug to the Tpetra "
2075  "developers.");
2076 #else
2077  Teuchos::ArrayView<const GO> lgMap = this->getLocalElementList ();
2078 #endif // 1
2079 
2080  const GO indexBase = this->getIndexBase ();
2081  // map stores HostSpace of CudaSpace but constructor is still CudaUVMSpace
2082  auto lgMap_device = Kokkos::create_mirror_view_and_copy(device_type(), lgMap);
2083  return rcp (new map_type (RECOMPUTE, lgMap_device, indexBase, newComm));
2084  }
2085  }
2086 
2087  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2088  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
2091  {
2092  using Teuchos::Comm;
2093  using Teuchos::null;
2094  using Teuchos::outArg;
2095  using Teuchos::RCP;
2096  using Teuchos::rcp;
2097  using Teuchos::REDUCE_MIN;
2098  using Teuchos::reduceAll;
2099 
2100  // Create the new communicator. split() returns a valid
2101  // communicator on all processes. On processes where color == 0,
2102  // ignore the result. Passing key == 0 tells MPI to order the
2103  // processes in the new communicator by their rank in the old
2104  // communicator.
2105  const int color = (numLocalElements_ == 0) ? 0 : 1;
2106  // MPI_Comm_split must be called collectively over the original
2107  // communicator. We can't just call it on processes with color
2108  // one, even though we will ignore its result on processes with
2109  // color zero.
2110  RCP<const Comm<int> > newComm = comm_->split (color, 0);
2111  if (color == 0) {
2112  newComm = null;
2113  }
2114 
2115  // Create the Map to return.
2116  if (newComm.is_null ()) {
2117  return null; // my process does not participate in the new Map
2118  } else {
2119  RCP<Map> map = rcp (new Map ());
2120 
2121  map->comm_ = newComm;
2122  map->indexBase_ = indexBase_;
2123  map->numGlobalElements_ = numGlobalElements_;
2124  map->numLocalElements_ = numLocalElements_;
2125  map->minMyGID_ = minMyGID_;
2126  map->maxMyGID_ = maxMyGID_;
2127  map->minAllGID_ = minAllGID_;
2128  map->maxAllGID_ = maxAllGID_;
2129  map->firstContiguousGID_= firstContiguousGID_;
2130  map->lastContiguousGID_ = lastContiguousGID_;
2131 
2132  // Uniformity and contiguity have not changed. The directory
2133  // has changed, but we've taken care of that above.
2134  map->uniform_ = uniform_;
2135  map->contiguous_ = contiguous_;
2136 
2137  // If the original Map was NOT distributed, then the new Map
2138  // cannot be distributed.
2139  //
2140  // If the number of processes in the new communicator is 1, then
2141  // the new Map is not distributed.
2142  //
2143  // Otherwise, we have to check the new Map using an all-reduce
2144  // (over the new communicator). For example, the original Map
2145  // may have had some processes with zero elements, and all other
2146  // processes with the same number of elements as in the whole
2147  // Map. That Map is technically distributed, because of the
2148  // processes with zero elements. Removing those processes would
2149  // make the new Map locally replicated.
2150  if (! distributed_ || newComm->getSize () == 1) {
2151  map->distributed_ = false;
2152  } else {
2153  const int iOwnAllGids = (numLocalElements_ == numGlobalElements_) ? 1 : 0;
2154  int allProcsOwnAllGids = 0;
2155  reduceAll<int, int> (*newComm, REDUCE_MIN, iOwnAllGids, outArg (allProcsOwnAllGids));
2156  map->distributed_ = (allProcsOwnAllGids == 1) ? false : true;
2157  }
2158 
2159  map->lgMap_ = lgMap_;
2160  map->lgMapHost_ = lgMapHost_;
2161  map->glMap_ = glMap_;
2162  map->glMapHost_ = glMapHost_;
2163 
2164  // Map's default constructor creates an uninitialized Directory.
2165  // The Directory will be initialized on demand in
2166  // getRemoteIndexList().
2167  //
2168  // FIXME (mfh 26 Mar 2013) It should be possible to "filter" the
2169  // directory more efficiently than just recreating it. If
2170  // directory recreation proves a bottleneck, we can always
2171  // revisit this. On the other hand, Directory creation is only
2172  // collective over the new, presumably much smaller
2173  // communicator, so it may not be worth the effort to optimize.
2174 
2175  return map;
2176  }
2177  }
2178 
2179  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2180  void
2182  {
2183  TEUCHOS_TEST_FOR_EXCEPTION(
2184  directory_.is_null (), std::logic_error, "Tpetra::Map::setupDirectory: "
2185  "The Directory is null. "
2186  "Please report this bug to the Tpetra developers.");
2187 
2188  // Only create the Directory if it hasn't been created yet.
2189  // This is a collective operation.
2190  if (! directory_->initialized ()) {
2191  directory_->initialize (*this);
2192  }
2193  }
2194 
2195  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2196  LookupStatus
2198  getRemoteIndexList (const Teuchos::ArrayView<const GlobalOrdinal>& GIDs,
2199  const Teuchos::ArrayView<int>& PIDs,
2200  const Teuchos::ArrayView<LocalOrdinal>& LIDs) const
2201  {
2202  using Tpetra::Details::OrdinalTraits;
2204  using std::endl;
2205  using size_type = Teuchos::ArrayView<int>::size_type;
2206 
2207  const bool verbose = Details::Behavior::verbose("Map");
2208  const size_t maxNumToPrint = verbose ?
2210  std::unique_ptr<std::string> prefix;
2211  if (verbose) {
2212  prefix = Details::createPrefix(comm_.getRawPtr(),
2213  "Map", "getRemoteIndexList(GIDs,PIDs,LIDs)");
2214  std::ostringstream os;
2215  os << *prefix << "Start: ";
2216  verbosePrintArray(os, GIDs, "GIDs", maxNumToPrint);
2217  os << endl;
2218  std::cerr << os.str();
2219  }
2220 
2221  // Empty Maps (i.e., containing no indices on any processes in the
2222  // Map's communicator) are perfectly valid. In that case, if the
2223  // input GID list is nonempty, we fill the output arrays with
2224  // invalid values, and return IDNotPresent to notify the caller.
2225  // It's perfectly valid to give getRemoteIndexList GIDs that the
2226  // Map doesn't own. SubmapImport test 2 needs this functionality.
2227  if (getGlobalNumElements () == 0) {
2228  if (GIDs.size () == 0) {
2229  if (verbose) {
2230  std::ostringstream os;
2231  os << *prefix << "Done; both Map & input are empty" << endl;
2232  std::cerr << os.str();
2233  }
2234  return AllIDsPresent; // trivially
2235  }
2236  else {
2237  if (verbose) {
2238  std::ostringstream os;
2239  os << *prefix << "Done: Map is empty on all processes, "
2240  "so all output PIDs & LIDs are invalid (-1)." << endl;
2241  std::cerr << os.str();
2242  }
2243  for (size_type k = 0; k < PIDs.size (); ++k) {
2244  PIDs[k] = OrdinalTraits<int>::invalid ();
2245  }
2246  for (size_type k = 0; k < LIDs.size (); ++k) {
2247  LIDs[k] = OrdinalTraits<LocalOrdinal>::invalid ();
2248  }
2249  return IDNotPresent;
2250  }
2251  }
2252 
2253  // getRemoteIndexList must be called collectively, and Directory
2254  // initialization is collective too, so it's OK to initialize the
2255  // Directory on demand.
2256 
2257  if (verbose) {
2258  std::ostringstream os;
2259  os << *prefix << "Call setupDirectory" << endl;
2260  std::cerr << os.str();
2261  }
2262  setupDirectory();
2263  if (verbose) {
2264  std::ostringstream os;
2265  os << *prefix << "Call directory_->getDirectoryEntries" << endl;
2266  std::cerr << os.str();
2267  }
2268  const Tpetra::LookupStatus retVal =
2269  directory_->getDirectoryEntries (*this, GIDs, PIDs, LIDs);
2270  if (verbose) {
2271  std::ostringstream os;
2272  os << *prefix << "Done; getDirectoryEntries returned "
2273  << (retVal == IDNotPresent ? "IDNotPresent" : "AllIDsPresent")
2274  << "; ";
2275  verbosePrintArray(os, PIDs, "PIDs", maxNumToPrint);
2276  os << ", ";
2277  verbosePrintArray(os, LIDs, "LIDs", maxNumToPrint);
2278  os << endl;
2279  std::cerr << os.str();
2280  }
2281  return retVal;
2282  }
2283 
2284  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2285  LookupStatus
2287  getRemoteIndexList (const Teuchos::ArrayView<const GlobalOrdinal> & GIDs,
2288  const Teuchos::ArrayView<int> & PIDs) const
2289  {
2291  using std::endl;
2292 
2293  const bool verbose = Details::Behavior::verbose("Map");
2294  const size_t maxNumToPrint = verbose ?
2296  std::unique_ptr<std::string> prefix;
2297  if (verbose) {
2298  prefix = Details::createPrefix(comm_.getRawPtr(),
2299  "Map", "getRemoteIndexList(GIDs,PIDs)");
2300  std::ostringstream os;
2301  os << *prefix << "Start: ";
2302  verbosePrintArray(os, GIDs, "GIDs", maxNumToPrint);
2303  os << endl;
2304  std::cerr << os.str();
2305  }
2306 
2307  if (getGlobalNumElements () == 0) {
2308  if (GIDs.size () == 0) {
2309  if (verbose) {
2310  std::ostringstream os;
2311  os << *prefix << "Done; both Map & input are empty" << endl;
2312  std::cerr << os.str();
2313  }
2314  return AllIDsPresent; // trivially
2315  }
2316  else {
2317  if (verbose) {
2318  std::ostringstream os;
2319  os << *prefix << "Done: Map is empty on all processes, "
2320  "so all output PIDs are invalid (-1)." << endl;
2321  std::cerr << os.str();
2322  }
2323  for (Teuchos::ArrayView<int>::size_type k = 0; k < PIDs.size (); ++k) {
2324  PIDs[k] = Tpetra::Details::OrdinalTraits<int>::invalid ();
2325  }
2326  return IDNotPresent;
2327  }
2328  }
2329 
2330  // getRemoteIndexList must be called collectively, and Directory
2331  // initialization is collective too, so it's OK to initialize the
2332  // Directory on demand.
2333 
2334  if (verbose) {
2335  std::ostringstream os;
2336  os << *prefix << "Call setupDirectory" << endl;
2337  std::cerr << os.str();
2338  }
2339  setupDirectory();
2340  if (verbose) {
2341  std::ostringstream os;
2342  os << *prefix << "Call directory_->getDirectoryEntries" << endl;
2343  std::cerr << os.str();
2344  }
2345  const Tpetra::LookupStatus retVal =
2346  directory_->getDirectoryEntries(*this, GIDs, PIDs);
2347  if (verbose) {
2348  std::ostringstream os;
2349  os << *prefix << "Done; getDirectoryEntries returned "
2350  << (retVal == IDNotPresent ? "IDNotPresent" : "AllIDsPresent")
2351  << "; ";
2352  verbosePrintArray(os, PIDs, "PIDs", maxNumToPrint);
2353  os << endl;
2354  std::cerr << os.str();
2355  }
2356  return retVal;
2357  }
2358 
2359  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2360  void
2362  using exec_space = typename Node::device_type::execution_space;
2363  if(lgMap_.extent(0) != lgMapHost_.extent(0)) {
2364  Tpetra::Details::ProfilingRegion pr("Map::lazyPushToHost() - pushing data");
2365  // NOTE: We check lgMap_ and not glMap_, since the latter can
2366  // be somewhat error prone for contiguous maps
2367 
2368  // create_mirror_view preserves const-ness. create_mirror does not
2369  auto lgMap_host = Kokkos::create_mirror(Kokkos::HostSpace (), lgMap_);
2370 
2371  // Since this was computed on the default stream, we can copy on the stream and then fence
2372  // the stream
2373  Kokkos::deep_copy(exec_space(),lgMap_host,lgMap_);
2374  exec_space().fence();
2375  lgMapHost_ = lgMap_host;
2376 
2377  // Make host version - when memory spaces match these just do trivial assignment
2378  glMapHost_ = global_to_local_table_host_type(glMap_);
2379  }
2380  }
2381 
2382 
2383  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2384  Teuchos::RCP<const Teuchos::Comm<int> >
2386  return comm_;
2387  }
2388 
2389  template <class LocalOrdinal,class GlobalOrdinal, class Node>
2390  bool
2392  checkIsDist() const
2393  {
2394  using Teuchos::as;
2395  using Teuchos::outArg;
2396  using Teuchos::REDUCE_MIN;
2397  using Teuchos::reduceAll;
2398  using std::endl;
2399 
2400  const bool verbose = Details::Behavior::verbose("Map");
2401  std::unique_ptr<std::string> prefix;
2402  if (verbose) {
2403  prefix = Details::createPrefix(
2404  comm_.getRawPtr(), "Map", "checkIsDist");
2405  std::ostringstream os;
2406  os << *prefix << "Start" << endl;
2407  std::cerr << os.str();
2408  }
2409 
2410  bool global = false;
2411  if (comm_->getSize () > 1) {
2412  // The communicator has more than one process, but that doesn't
2413  // necessarily mean the Map is distributed.
2414  int localRep = 0;
2415  if (numGlobalElements_ == as<global_size_t> (numLocalElements_)) {
2416  // The number of local elements on this process equals the
2417  // number of global elements.
2418  //
2419  // NOTE (mfh 22 Nov 2011) Does this still work if there were
2420  // duplicates in the global ID list on input (the third Map
2421  // constructor), so that the number of local elements (which
2422  // are not duplicated) on this process could be less than the
2423  // number of global elements, even if this process owns all
2424  // the elements?
2425  localRep = 1;
2426  }
2427  int allLocalRep;
2428  reduceAll<int, int> (*comm_, REDUCE_MIN, localRep, outArg (allLocalRep));
2429  if (allLocalRep != 1) {
2430  // At least one process does not own all the elements.
2431  // This makes the Map a distributed Map.
2432  global = true;
2433  }
2434  }
2435  // If the communicator has only one process, then the Map is not
2436  // distributed.
2437 
2438  if (verbose) {
2439  std::ostringstream os;
2440  os << *prefix << "Done; global=" << (global ? "true" : "false")
2441  << endl;
2442  std::cerr << os.str();
2443  }
2444  return global;
2445  }
2446 
2447 } // namespace Tpetra
2448 
2449 template <class LocalOrdinal, class GlobalOrdinal>
2450 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal> >
2451 Tpetra::createLocalMap (const size_t numElements,
2452  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2453 {
2454  typedef LocalOrdinal LO;
2455  typedef GlobalOrdinal GO;
2456  using NT = typename ::Tpetra::Map<LO, GO>::node_type;
2457  return createLocalMapWithNode<LO, GO, NT> (numElements, comm);
2458 }
2459 
2460 template <class LocalOrdinal, class GlobalOrdinal>
2461 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal> >
2462 Tpetra::createUniformContigMap (const global_size_t numElements,
2463  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2464 {
2465  typedef LocalOrdinal LO;
2466  typedef GlobalOrdinal GO;
2467  using NT = typename ::Tpetra::Map<LO, GO>::node_type;
2468  return createUniformContigMapWithNode<LO, GO, NT> (numElements, comm);
2469 }
2470 
2471 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2472 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node> >
2473 Tpetra::createUniformContigMapWithNode (const global_size_t numElements,
2474  const Teuchos::RCP<const Teuchos::Comm<int> >& comm
2475 )
2476 {
2477  using Teuchos::rcp;
2479  const GlobalOrdinal indexBase = static_cast<GlobalOrdinal> (0);
2480 
2481  return rcp (new map_type (numElements, indexBase, comm, GloballyDistributed));
2482 }
2483 
2484 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2485 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node> >
2486 Tpetra::createLocalMapWithNode (const size_t numElements,
2487  const Teuchos::RCP<const Teuchos::Comm<int> >& comm
2488 )
2489 {
2490  using Tpetra::global_size_t;
2491  using Teuchos::rcp;
2493  const GlobalOrdinal indexBase = 0;
2494  const global_size_t globalNumElts = static_cast<global_size_t> (numElements);
2495 
2496  return rcp (new map_type (globalNumElts, indexBase, comm, LocallyReplicated));
2497 }
2498 
2499 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2500 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node> >
2502  const size_t localNumElements,
2503  const Teuchos::RCP<const Teuchos::Comm<int> >& comm
2504 )
2505 {
2506  using Teuchos::rcp;
2508  const GlobalOrdinal indexBase = 0;
2509 
2510  return rcp (new map_type (numElements, localNumElements, indexBase, comm));
2511 }
2512 
2513 template <class LocalOrdinal, class GlobalOrdinal>
2514 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal> >
2516  const size_t localNumElements,
2517  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2518 {
2519  typedef LocalOrdinal LO;
2520  typedef GlobalOrdinal GO;
2521  using NT = typename Tpetra::Map<LO, GO>::node_type;
2522 
2523  return Tpetra::createContigMapWithNode<LO, GO, NT> (numElements, localNumElements, comm);
2524 }
2525 
2526 template <class LocalOrdinal, class GlobalOrdinal>
2527 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal> >
2528 Tpetra::createNonContigMap(const Teuchos::ArrayView<const GlobalOrdinal>& elementList,
2529  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2530 {
2531  typedef LocalOrdinal LO;
2532  typedef GlobalOrdinal GO;
2533  using NT = typename Tpetra::Map<LO, GO>::node_type;
2534 
2535  return Tpetra::createNonContigMapWithNode<LO, GO, NT> (elementList, comm);
2536 }
2537 
2538 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2539 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node> >
2540 Tpetra::createNonContigMapWithNode (const Teuchos::ArrayView<const GlobalOrdinal>& elementList,
2541  const Teuchos::RCP<const Teuchos::Comm<int> >& comm
2542 )
2543 {
2544  using Teuchos::rcp;
2546  using GST = Tpetra::global_size_t;
2547  const GST INV = Tpetra::Details::OrdinalTraits<GST>::invalid ();
2548  // FIXME (mfh 22 Jul 2016) This is what I found here, but maybe this
2549  // shouldn't be zero, given that the index base is supposed to equal
2550  // the globally min global index?
2551  const GlobalOrdinal indexBase = 0;
2552 
2553  return rcp (new map_type (INV, elementList, indexBase, comm));
2554 }
2555 
2556 template<class LO, class GO, class NT>
2557 Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >
2558 Tpetra::createOneToOne (const Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >& M)
2559 {
2561  using Teuchos::Array;
2562  using Teuchos::ArrayView;
2563  using Teuchos::as;
2564  using Teuchos::rcp;
2565  using std::cerr;
2566  using std::endl;
2567  using map_type = Tpetra::Map<LO, GO, NT>;
2568  using GST = global_size_t;
2569 
2570  const bool verbose = Details::Behavior::verbose("Map");
2571  std::unique_ptr<std::string> prefix;
2572  if (verbose) {
2573  auto comm = M.is_null() ? Teuchos::null : M->getComm();
2574  prefix = Details::createPrefix(
2575  comm.getRawPtr(), "createOneToOne(Map)");
2576  std::ostringstream os;
2577  os << *prefix << "Start" << endl;
2578  cerr << os.str();
2579  }
2580  const size_t maxNumToPrint = verbose ?
2581  Details::Behavior::verbosePrintCountThreshold() : size_t(0);
2582  const GST GINV = Tpetra::Details::OrdinalTraits<GST>::invalid ();
2583  const int myRank = M->getComm ()->getRank ();
2584 
2585  // Bypasses for special cases where either M is known to be
2586  // one-to-one, or the one-to-one version of M is easy to compute.
2587  // This is why we take M as an RCP, not as a const reference -- so
2588  // that we can return M itself if it is 1-to-1.
2589  if (! M->isDistributed ()) {
2590  // For a locally replicated Map, we assume that users want to push
2591  // all the GIDs to Process 0.
2592 
2593  // mfh 05 Nov 2013: getGlobalNumElements() does indeed return what
2594  // you think it should return, in this special case of a locally
2595  // replicated contiguous Map.
2596  const GST numGlobalEntries = M->getGlobalNumElements ();
2597  if (M->isContiguous()) {
2598  const size_t numLocalEntries =
2599  (myRank == 0) ? as<size_t>(numGlobalEntries) : size_t(0);
2600  if (verbose) {
2601  std::ostringstream os;
2602  os << *prefix << "Input is locally replicated & contiguous; "
2603  "numLocalEntries=" << numLocalEntries << endl;
2604  cerr << os.str ();
2605  }
2606  auto retMap =
2607  rcp(new map_type(numGlobalEntries, numLocalEntries,
2608  M->getIndexBase(), M->getComm()));
2609  if (verbose) {
2610  std::ostringstream os;
2611  os << *prefix << "Done" << endl;
2612  cerr << os.str ();
2613  }
2614  return retMap;
2615  }
2616  else {
2617  if (verbose) {
2618  std::ostringstream os;
2619  os << *prefix << "Input is locally replicated & noncontiguous"
2620  << endl;
2621  cerr << os.str ();
2622  }
2623  ArrayView<const GO> myGids =
2624  (myRank == 0) ? M->getLocalElementList() : Teuchos::null;
2625  auto retMap =
2626  rcp(new map_type(GINV, myGids(), M->getIndexBase(),
2627  M->getComm()));
2628  if (verbose) {
2629  std::ostringstream os;
2630  os << *prefix << "Done" << endl;
2631  cerr << os.str ();
2632  }
2633  return retMap;
2634  }
2635  }
2636  else if (M->isContiguous ()) {
2637  if (verbose) {
2638  std::ostringstream os;
2639  os << *prefix << "Input is distributed & contiguous" << endl;
2640  cerr << os.str ();
2641  }
2642  // Contiguous, distributed Maps are one-to-one by construction.
2643  // (Locally replicated Maps can be contiguous.)
2644  return M;
2645  }
2646  else {
2647  if (verbose) {
2648  std::ostringstream os;
2649  os << *prefix << "Input is distributed & noncontiguous" << endl;
2650  cerr << os.str ();
2651  }
2653  const size_t numMyElems = M->getLocalNumElements ();
2654  ArrayView<const GO> myElems = M->getLocalElementList ();
2655  Array<int> owner_procs_vec (numMyElems);
2656 
2657  if (verbose) {
2658  std::ostringstream os;
2659  os << *prefix << "Call Directory::getDirectoryEntries: ";
2660  verbosePrintArray(os, myElems, "GIDs", maxNumToPrint);
2661  os << endl;
2662  cerr << os.str();
2663  }
2664  directory.getDirectoryEntries (*M, myElems, owner_procs_vec ());
2665  if (verbose) {
2666  std::ostringstream os;
2667  os << *prefix << "getDirectoryEntries result: ";
2668  verbosePrintArray(os, owner_procs_vec, "PIDs", maxNumToPrint);
2669  os << endl;
2670  cerr << os.str();
2671  }
2672 
2673  Array<GO> myOwned_vec (numMyElems);
2674  size_t numMyOwnedElems = 0;
2675  for (size_t i = 0; i < numMyElems; ++i) {
2676  const GO GID = myElems[i];
2677  const int owner = owner_procs_vec[i];
2678 
2679  if (myRank == owner) {
2680  myOwned_vec[numMyOwnedElems++] = GID;
2681  }
2682  }
2683  myOwned_vec.resize (numMyOwnedElems);
2684 
2685  if (verbose) {
2686  std::ostringstream os;
2687  os << *prefix << "Create Map: ";
2688  verbosePrintArray(os, myOwned_vec, "GIDs", maxNumToPrint);
2689  os << endl;
2690  cerr << os.str();
2691  }
2692  auto retMap = rcp(new map_type(GINV, myOwned_vec(),
2693  M->getIndexBase(), M->getComm()));
2694  if (verbose) {
2695  std::ostringstream os;
2696  os << *prefix << "Done" << endl;
2697  cerr << os.str();
2698  }
2699  return retMap;
2700  }
2701 }
2702 
2703 template<class LocalOrdinal, class GlobalOrdinal, class Node>
2704 Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >
2707 {
2708  using Details::Behavior;
2710  using Teuchos::Array;
2711  using Teuchos::ArrayView;
2712  using Teuchos::RCP;
2713  using Teuchos::rcp;
2714  using Teuchos::toString;
2715  using std::cerr;
2716  using std::endl;
2717  using LO = LocalOrdinal;
2718  using GO = GlobalOrdinal;
2719  using map_type = Tpetra::Map<LO, GO, Node>;
2720 
2721  const bool verbose = Behavior::verbose("Map");
2722  std::unique_ptr<std::string> prefix;
2723  if (verbose) {
2724  auto comm = M.is_null() ? Teuchos::null : M->getComm();
2725  prefix = Details::createPrefix(
2726  comm.getRawPtr(), "createOneToOne(Map,TieBreak)");
2727  std::ostringstream os;
2728  os << *prefix << "Start" << endl;
2729  cerr << os.str();
2730  }
2731  const size_t maxNumToPrint = verbose ?
2732  Behavior::verbosePrintCountThreshold() : size_t(0);
2733 
2734  // FIXME (mfh 20 Feb 2013) We should have a bypass for contiguous
2735  // Maps (which are 1-to-1 by construction).
2736 
2738  if (verbose) {
2739  std::ostringstream os;
2740  os << *prefix << "Initialize Directory" << endl;
2741  cerr << os.str ();
2742  }
2743  directory.initialize (*M, tie_break);
2744  if (verbose) {
2745  std::ostringstream os;
2746  os << *prefix << "Done initializing Directory" << endl;
2747  cerr << os.str ();
2748  }
2749  size_t numMyElems = M->getLocalNumElements ();
2750  ArrayView<const GO> myElems = M->getLocalElementList ();
2751  Array<int> owner_procs_vec (numMyElems);
2752  if (verbose) {
2753  std::ostringstream os;
2754  os << *prefix << "Call Directory::getDirectoryEntries: ";
2755  verbosePrintArray(os, myElems, "GIDs", maxNumToPrint);
2756  os << endl;
2757  cerr << os.str();
2758  }
2759  directory.getDirectoryEntries (*M, myElems, owner_procs_vec ());
2760  if (verbose) {
2761  std::ostringstream os;
2762  os << *prefix << "getDirectoryEntries result: ";
2763  verbosePrintArray(os, owner_procs_vec, "PIDs", maxNumToPrint);
2764  os << endl;
2765  cerr << os.str();
2766  }
2767 
2768  const int myRank = M->getComm()->getRank();
2769  Array<GO> myOwned_vec (numMyElems);
2770  size_t numMyOwnedElems = 0;
2771  for (size_t i = 0; i < numMyElems; ++i) {
2772  const GO GID = myElems[i];
2773  const int owner = owner_procs_vec[i];
2774  if (myRank == owner) {
2775  myOwned_vec[numMyOwnedElems++] = GID;
2776  }
2777  }
2778  myOwned_vec.resize (numMyOwnedElems);
2779 
2780  // FIXME (mfh 08 May 2014) The above Directory should be perfectly
2781  // valid for the new Map. Why can't we reuse it?
2782  const global_size_t GINV =
2783  Tpetra::Details::OrdinalTraits<global_size_t>::invalid ();
2784  if (verbose) {
2785  std::ostringstream os;
2786  os << *prefix << "Create Map: ";
2787  verbosePrintArray(os, myOwned_vec, "GIDs", maxNumToPrint);
2788  os << endl;
2789  cerr << os.str();
2790  }
2791  RCP<const map_type> retMap
2792  (new map_type (GINV, myOwned_vec (), M->getIndexBase (),
2793  M->getComm ()));
2794  if (verbose) {
2795  std::ostringstream os;
2796  os << *prefix << "Done" << endl;
2797  cerr << os.str();
2798  }
2799  return retMap;
2800 }
2801 
2802 //
2803 // Explicit instantiation macro
2804 //
2805 // Must be expanded from within the Tpetra namespace!
2806 //
2807 
2809 
2810 #define TPETRA_MAP_INSTANT(LO,GO,NODE) \
2811  \
2812  template class Map< LO , GO , NODE >; \
2813  \
2814  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2815  createLocalMapWithNode<LO,GO,NODE> (const size_t numElements, \
2816  const Teuchos::RCP< const Teuchos::Comm< int > >& comm); \
2817  \
2818  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2819  createContigMapWithNode<LO,GO,NODE> (const global_size_t numElements, \
2820  const size_t localNumElements, \
2821  const Teuchos::RCP< const Teuchos::Comm< int > >& comm); \
2822  \
2823  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2824  createNonContigMapWithNode(const Teuchos::ArrayView<const GO> &elementList, \
2825  const Teuchos::RCP<const Teuchos::Comm<int> > &comm); \
2826  \
2827  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2828  createUniformContigMapWithNode<LO,GO,NODE> (const global_size_t numElements, \
2829  const Teuchos::RCP< const Teuchos::Comm< int > >& comm); \
2830  \
2831  template Teuchos::RCP<const Map<LO,GO,NODE> > \
2832  createOneToOne (const Teuchos::RCP<const Map<LO,GO,NODE> >& M); \
2833  \
2834  template Teuchos::RCP<const Map<LO,GO,NODE> > \
2835  createOneToOne (const Teuchos::RCP<const Map<LO,GO,NODE> >& M, \
2836  const Tpetra::Details::TieBreak<LO,GO>& tie_break); \
2837 
2838 
2840 #define TPETRA_MAP_INSTANT_DEFAULTNODE(LO,GO) \
2841  template Teuchos::RCP< const Map<LO,GO> > \
2842  createLocalMap<LO,GO>( const size_t, const Teuchos::RCP< const Teuchos::Comm< int > > &); \
2843  \
2844  template Teuchos::RCP< const Map<LO,GO> > \
2845  createContigMap<LO,GO>( global_size_t, size_t, \
2846  const Teuchos::RCP< const Teuchos::Comm< int > > &); \
2847  \
2848  template Teuchos::RCP< const Map<LO,GO> > \
2849  createNonContigMap(const Teuchos::ArrayView<const GO> &, \
2850  const Teuchos::RCP<const Teuchos::Comm<int> > &); \
2851  \
2852  template Teuchos::RCP< const Map<LO,GO> > \
2853  createUniformContigMap<LO,GO>( const global_size_t, \
2854  const Teuchos::RCP< const Teuchos::Comm< int > > &); \
2855 
2856 #endif // TPETRA_MAP_DEF_HPP
Interface for breaking ties in ownership.
bool congruent(const Teuchos::Comm< int > &comm1, const Teuchos::Comm< int > &comm2)
Whether the two communicators are congruent.
Definition: Tpetra_Util.cpp:64
global_ordinal_type getMaxGlobalIndex() const
The maximum global index owned by the calling process.
LocalOrdinal local_ordinal_type
The type of local indices.
Declaration of Tpetra::Details::extractMpiCommFromTeuchos.
virtual ~Map()
Destructor (virtual for memory safety of derived classes).
GlobalOrdinal global_ordinal_type
The type of global indices.
bool isSameAs(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is identical to this Map.
Declaration of Tpetra::Details::printOnce.
global_indices_array_type getMyGlobalIndices() const
Return a view of the global indices owned by this process.
bool isContiguous() const
True if this Map is distributed contiguously, else false.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
Declaration of a function that prints strings from each process.
size_t getLocalNumElements() const
The number of elements belonging to the calling process.
bool isLocallyFitted(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is locally fitted to this Map.
bool isUniform() const
Whether the range of global indices is uniform.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createContigMapWithNode(const global_size_t numElements, const size_t localNumElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a (potentially) nonuniformly distributed, contiguous Map for a user-specifi...
global_indices_array_device_type getMyGlobalIndicesDevice() const
Return a view of the global indices owned by this process on the Map&#39;s device.
bool isDistributed() const
Whether this Map is globally distributed or locally replicated.
Teuchos::RCP< const Map< local_ordinal_type, global_ordinal_type, Node > > removeEmptyProcesses() const
Advanced methods.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
static bool debug()
Whether Tpetra is in debug mode.
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
int local_ordinal_type
Default value of Scalar template parameter.
&quot;Local&quot; part of Map suitable for Kokkos kernels.
Declaration of Tpetra::Details::initializeKokkos.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator, in rank order.
Teuchos::ArrayView< const global_ordinal_type > getLocalElementList() const
Return a NONOWNING view of the global indices owned by this process.
void initializeKokkos()
Initialize Kokkos, using command-line arguments (if any) given to Teuchos::GlobalMPISession.
size_t global_size_t
Global size_t object.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
bool isCompatible(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is compatible with this Map.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified, possibly nondefault Kokkos Node type.
global_ordinal_type getIndexBase() const
The index base for this Map.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
Teuchos::RCP< const Map< local_ordinal_type, global_ordinal_type, Node > > replaceCommWithSubset(const Teuchos::RCP< const Teuchos::Comm< int > > &newComm) const
Replace this Map&#39;s communicator with a subset communicator.
Functions for initializing and finalizing Tpetra.
bool isNodeGlobalElement(global_ordinal_type globalIndex) const
Whether the given global index is owned by this Map on the calling process.
static bool verbose()
Whether Tpetra is in verbose mode.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createUniformContigMap(const global_size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a uniformly distributed, contiguous Map with the default Kokkos Node...
LookupStatus getDirectoryEntries(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs) const
Given a global ID list, return the list of their owning process IDs.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createContigMap(const global_size_t numElements, const size_t localNumElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a (potentially) non-uniformly distributed, contiguous Map using the defaul...
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createLocalMap(const size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a locally replicated Map with the default Kokkos Node.
local_map_type getLocalMap() const
Get the local Map for Kokkos kernels.
global_ordinal_type getMinGlobalIndex() const
The minimum global index owned by the calling process.
typename device_type::execution_space execution_space
The Kokkos execution space.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createUniformContigMapWithNode(const global_size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a uniformly distributed, contiguous Map with a user-specified Kokkos Node...
A parallel distribution of indices over processes.
global_ordinal_type getMaxAllGlobalIndex() const
The maximum global index over all processes in the communicator.
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
Implement mapping from global ID to process ID and local ID.
Stand-alone utility functions and macros.
bool isOneToOne() const
Whether the Map is one to one.
void initialize(const map_type &map)
Initialize the Directory with its Map.
bool locallySameAs(const Map< local_ordinal_type, global_ordinal_type, node_type > &map) const
Is this Map locally the same as the input Map?
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createLocalMapWithNode(const size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a locally replicated Map with a specified Kokkos Node.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
typename Node::device_type device_type
This class&#39; Kokkos::Device specialization.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createNonContigMap(const Teuchos::ArrayView< const GlobalOrdinal > &elementList, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a non-contiguous Map using the default Kokkos::Device type.
LocalGlobal
Enum for local versus global allocation of Map entries.
LookupStatus getRemoteIndexList(const Teuchos::ArrayView< const global_ordinal_type > &GIDList, const Teuchos::ArrayView< int > &nodeIDList, const Teuchos::ArrayView< local_ordinal_type > &LIDList) const
Return the process ranks and corresponding local indices for the given global indices.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Describe this object in a human-readable way to the given output stream.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createNonContigMapWithNode(const Teuchos::ArrayView< const GlobalOrdinal > &elementList, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a noncontiguous Map with a user-specified, possibly nondefault Kokkos Node ...
Node node_type
Legacy typedef that will go away at some point.
global_ordinal_type getMinAllGlobalIndex() const
The minimum global index over all processes in the communicator.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
global_size_t getGlobalNumElements() const
The number of elements in this Map.
Description of Tpetra&#39;s behavior.
std::string description() const
Implementation of Teuchos::Describable.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.
Map()
Default constructor (that does nothing).