Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_makeColMap_def.hpp
Go to the documentation of this file.
1 #ifndef TPETRA_DETAILS_MAKECOLMAP_DEF_HPP
2 #define TPETRA_DETAILS_MAKECOLMAP_DEF_HPP
3 
4 // @HEADER
5 // *****************************************************************************
6 // Tpetra: Templated Linear Algebra Services Package
7 //
8 // Copyright 2008 NTESS and the Tpetra contributors.
9 // SPDX-License-Identifier: BSD-3-Clause
10 // *****************************************************************************
11 // @HEADER
12 
23 
24 #include "Tpetra_RowGraph.hpp"
25 #include "Tpetra_Util.hpp"
26 #include "Teuchos_Array.hpp"
27 #include "Kokkos_Bitset.hpp"
28 #include <set>
29 #include <vector>
30 
31 namespace Tpetra {
32 namespace Details {
33 
34 template <class LO, class GO, class NT>
35 int makeColMapImpl(Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& colMap,
36  Teuchos::Array<int>& remotePIDs,
37  const Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& domMap,
38  size_t numLocalColGIDs,
39  size_t numRemoteColGIDs,
40  std::set<GO>& RemoteGIDSet,
41  std::vector<GO>& RemoteGIDUnorderedVector,
42  std::vector<bool>& GIDisLocal,
43  const bool sortEachProcsGids,
44  std::ostream* errStrm) {
45  using std::endl;
46  using Teuchos::Array;
47  using Teuchos::ArrayView;
48  using Teuchos::rcp;
49  int errCode = 0;
50  const char prefix[] = "Tpetra::Details::makeColMapImpl: ";
51  typedef ::Tpetra::Map<LO, GO, NT> map_type;
52  // Possible short-circuit for serial scenario:
53  //
54  // If all domain GIDs are present as column indices, then set
55  // ColMap=DomainMap. By construction, LocalGIDs is a subset of
56  // DomainGIDs.
57  //
58  // If we have
59  // * Number of remote GIDs is 0, so that ColGIDs == LocalGIDs,
60  // and
61  // * Number of local GIDs is number of domain GIDs
62  // then
63  // * LocalGIDs \subset DomainGIDs && size(LocalGIDs) ==
64  // size(DomainGIDs) => DomainGIDs == LocalGIDs == ColGIDs
65  // on the calling process.
66  //
67  // We will concern ourselves only with the special case of a
68  // serial DomainMap, obviating the need for communication.
69  //
70  // If
71  // * DomainMap has a serial communicator
72  // then we can set the column Map as the domain Map
73  // return. Benefit: this graph won't need an Import object
74  // later.
75  //
76  // Note, for a serial domain map, there can be no RemoteGIDs,
77  // because there are no remote processes. Likely explanations
78  // for this are:
79  // * user submitted erroneous column indices
80  // * user submitted erroneous domain Map
81  if (domMap->getComm()->getSize() == 1) {
82  if (numRemoteColGIDs != 0) {
83  errCode = -2;
84  if (errStrm != NULL) {
85  *errStrm << prefix << "The domain Map only has one process, but "
86  << numRemoteColGIDs << " column "
87  << (numRemoteColGIDs != 1 ? "indices are" : "index is")
88  << " not in the domain Map. Either these indices are "
89  "invalid or the domain Map is invalid. Remember that nonsquare "
90  "matrices, or matrices where the row and range Maps differ, "
91  "require calling the version of fillComplete that takes the "
92  "domain and range Maps as input."
93  << endl;
94  }
95  }
96  if (numLocalColGIDs == domMap->getLocalNumElements()) {
97  colMap = domMap; // shallow copy
98  return errCode;
99  }
100  }
101 
102  // Populate myColumns with a list of all column GIDs. Put
103  // locally owned (in the domain Map) GIDs at the front: they
104  // correspond to "same" and "permuted" entries between the
105  // column Map and the domain Map. Put remote GIDs at the back.
106  Array<GO> myColumns(numLocalColGIDs + numRemoteColGIDs);
107  // get pointers into myColumns for each part
108  ArrayView<GO> LocalColGIDs = myColumns(0, numLocalColGIDs);
109  ArrayView<GO> remoteColGIDs = myColumns(numLocalColGIDs, numRemoteColGIDs);
110 
111  // Copy the remote GIDs into myColumns
112  if (sortEachProcsGids) {
113  // The std::set puts GIDs in increasing order.
114  std::copy(RemoteGIDSet.begin(), RemoteGIDSet.end(),
115  remoteColGIDs.begin());
116  } else {
117  // Respect the originally encountered order.
118  std::copy(RemoteGIDUnorderedVector.begin(),
119  RemoteGIDUnorderedVector.end(), remoteColGIDs.begin());
120  }
121 
122  // Make a list of process ranks corresponding to the remote GIDs.
123  // remotePIDs is an output argument of getRemoteIndexList below;
124  // its initial contents don't matter.
125  if (static_cast<size_t>(remotePIDs.size()) != numRemoteColGIDs) {
126  remotePIDs.resize(numRemoteColGIDs);
127  }
128  // Look up the remote process' ranks in the domain Map.
129  {
130  const LookupStatus stat =
131  domMap->getRemoteIndexList(remoteColGIDs, remotePIDs());
132 
133  // If any process returns IDNotPresent, then at least one of
134  // the remote indices was not present in the domain Map. This
135  // means that the Import object cannot be constructed, because
136  // of incongruity between the column Map and domain Map.
137  // This has two likely causes:
138  // - The user has made a mistake in the column indices
139  // - The user has made a mistake with respect to the domain Map
140  if (stat == IDNotPresent) {
141  if (errStrm != NULL) {
142  *errStrm << prefix << "Some column indices are not in the domain Map."
143  "Either these column indices are invalid or the domain Map is "
144  "invalid. Likely cause: For a nonsquare matrix, you must give the "
145  "domain and range Maps as input to fillComplete."
146  << endl;
147  }
148  // Don't return yet, because not all processes may have
149  // encountered this error state. This function ends with an
150  // all-reduce, so we have to make sure that everybody gets to
151  // that point. The resulting Map may be wrong, but at least
152  // nothing should crash.
153  errCode = -3;
154  }
155  }
156 
157  // Sort incoming remote column indices by their owning process
158  // rank, so that all columns coming from a given remote process
159  // are contiguous. This means the Import's Distributor doesn't
160  // need to reorder data.
161  //
162  // NOTE (mfh 02 Sep 2014) This needs to be a stable sort, so that
163  // it respects either of the possible orderings of GIDs (sorted,
164  // or original order) specified above.
165  sort2(remotePIDs.begin(), remotePIDs.end(), remoteColGIDs.begin(), true);
166 
167  // Copy the local GIDs into myColumns. Two cases:
168  // 1. If the number of Local column GIDs is the same as the number
169  // of Local domain GIDs, we can simply read the domain GIDs
170  // into the front part of ColIndices (see logic above from the
171  // serial short circuit case)
172  // 2. We step through the GIDs of the DomainMap, checking to see
173  // if each domain GID is a column GID. We want to do this to
174  // maintain a consistent ordering of GIDs between the columns
175  // and the domain.
176 
177  const size_t numDomainElts = domMap->getLocalNumElements();
178  if (numLocalColGIDs == numDomainElts) {
179  // If the number of locally owned GIDs are the same as the
180  // number of local domain Map elements, then the local domain
181  // Map elements are the same as the locally owned GIDs.
182  if (domMap->isContiguous()) {
183  // NOTE (mfh 03 Mar 2013, 02 Sep 2014) In the common case that
184  // the domain Map is contiguous, it's more efficient to avoid
185  // calling getLocalElementList(), since that permanently
186  // constructs and caches the GID list in the contiguous Map.
187  GO curColMapGid = domMap->getMinGlobalIndex();
188  for (size_t k = 0; k < numLocalColGIDs; ++k, ++curColMapGid) {
189  LocalColGIDs[k] = curColMapGid;
190  }
191  } else {
192  ArrayView<const GO> domainElts = domMap->getLocalElementList();
193  std::copy(domainElts.begin(), domainElts.end(), LocalColGIDs.begin());
194  }
195  } else {
196  // Count the number of locally owned GIDs, both to keep track of
197  // the current array index, and as a sanity check.
198  size_t numLocalCount = 0;
199  if (domMap->isContiguous()) {
200  // NOTE (mfh 03 Mar 2013, 02 Sep 2014) In the common case that
201  // the domain Map is contiguous, it's more efficient to avoid
202  // calling getLocalElementList(), since that permanently
203  // constructs and caches the GID list in the contiguous Map.
204  GO curColMapGid = domMap->getMinGlobalIndex();
205  for (size_t i = 0; i < numDomainElts; ++i, ++curColMapGid) {
206  if (GIDisLocal[i]) {
207  LocalColGIDs[numLocalCount++] = curColMapGid;
208  }
209  }
210  } else {
211  ArrayView<const GO> domainElts = domMap->getLocalElementList();
212  for (size_t i = 0; i < numDomainElts; ++i) {
213  if (GIDisLocal[i]) {
214  LocalColGIDs[numLocalCount++] = domainElts[i];
215  }
216  }
217  }
218  if (numLocalCount != numLocalColGIDs) {
219  if (errStrm != NULL) {
220  *errStrm << prefix << "numLocalCount = " << numLocalCount
221  << " != numLocalColGIDs = " << numLocalColGIDs
222  << ". This should never happen. "
223  "Please report this bug to the Tpetra developers."
224  << endl;
225  }
226  // Don't return yet, because not all processes may have
227  // encountered this error state. This function ends with an
228  // all-reduce, so we have to make sure that everybody gets to
229  // that point.
230  errCode = -4;
231  }
232  }
233 
234  // FIXME (mfh 03 Apr 2013) Now would be a good time to use the
235  // information we collected above to construct the Import. In
236  // particular, building an Import requires:
237  //
238  // 1. numSameIDs (length of initial contiguous sequence of GIDs
239  // on this process that are the same in both Maps; this
240  // equals the number of domain Map elements on this process)
241  //
242  // 2. permuteToLIDs and permuteFromLIDs (both empty in this
243  // case, since there's no permutation going on; the column
244  // Map starts with the domain Map's GIDs, and immediately
245  // after them come the remote GIDs)
246  //
247  // 3. remoteGIDs (exactly those GIDs that we found out above
248  // were not in the domain Map) and remoteLIDs (which we could
249  // have gotten above by using the three-argument version of
250  // getRemoteIndexList() that computes local indices as well
251  // as process ranks, instead of the two-argument version that
252  // was used above)
253  //
254  // 4. remotePIDs (which we have from the getRemoteIndexList() call
255  // above) -- NOTE (mfh 14 Sep 2017) Fix for Trilinos GitHub
256  // Issue #628 (https://github.com/trilinos/Trilinos/issues/628)
257  // addresses this. remotePIDs is now an output argument of
258  // both this function and CrsGraph::makeColMap, and
259  // CrsGraph::makeImportExport now has an option to use that
260  // information, if available (that is, if makeColMap was
261  // actually called, which it would not be if the graph already
262  // had a column Map).
263  //
264  // 5. Apply the permutation from sorting remotePIDs to both
265  // remoteGIDs and remoteLIDs (by calling sort3 above instead of
266  // sort2), instead of just to remoteLIDs alone.
267  //
268  // 6. Everything after the sort3 call in Import::setupExport():
269  // a. Create the Distributor via createFromRecvs(), which
270  // computes exportGIDs and exportPIDs
271  // b. Compute exportLIDs from exportGIDs (by asking the
272  // source Map, in this case the domain Map, to convert
273  // global to local)
274  //
275  // Steps 1-5 come for free, since we must do that work anyway in
276  // order to compute the column Map. In particular, Step 3 is
277  // even more expensive than Step 6a, since it involves both
278  // creating and using a new Distributor object.
279  const global_size_t INV =
280  Tpetra::Details::OrdinalTraits<global_size_t>::invalid();
281  // FIXME (mfh 05 Mar 2014) Doesn't the index base of a Map have to
282  // be the same as the Map's min GID? If the first column is empty
283  // (contains no entries), then the column Map's min GID won't
284  // necessarily be the same as the domain Map's index base.
285  const GO indexBase = domMap->getIndexBase();
286  colMap = rcp(new map_type(INV, myColumns, indexBase, domMap->getComm()));
287  return errCode;
288 }
289 
290 template <class LO, class GO, class NT>
291 int makeColMap(Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& colMap,
292  Teuchos::Array<int>& remotePIDs,
293  const Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& domMap,
294  const RowGraph<LO, GO, NT>& graph,
295  const bool sortEachProcsGids,
296  std::ostream* errStrm) {
297  using std::endl;
298  using Teuchos::Array;
299  using Teuchos::ArrayView;
300  using Teuchos::rcp;
301  typedef ::Tpetra::Map<LO, GO, NT> map_type;
302  const char prefix[] = "Tpetra::Details::makeColMap: ";
303  int errCode = 0;
304 
305  // If the input domain Map or its communicator is null on the
306  // calling process, then the calling process does not participate in
307  // the returned column Map. Thus, we can set the returned column
308  // Map to null on those processes, and return immediately. This is
309  // not an error condition, as long as when domMap and its
310  // communicator are NOT null, the graph's other Maps and
311  // communicator are not also null.
312  if (domMap.is_null() || domMap->getComm().is_null()) {
313  colMap = Teuchos::null;
314  return errCode;
315  }
316 
317  Array<GO> myColumns;
318  if (graph.isLocallyIndexed()) {
319  colMap = graph.getColMap();
320  // If the graph is locally indexed, it had better have a column Map.
321  // The extra check for ! graph.hasColMap() is conservative.
322  if (colMap.is_null() || !graph.hasColMap()) {
323  errCode = -1;
324  if (errStrm != NULL) {
325  *errStrm << prefix << "The graph is locally indexed on the calling "
326  "process, but has no column Map (either getColMap() returns null, "
327  "or hasColMap() returns false)."
328  << endl;
329  }
330  // Under this error condition, this process will not fill
331  // myColumns. The resulting new column Map will be incorrect,
332  // but at least we won't hang, and this process will report the
333  // error.
334  } else {
335  // The graph already has a column Map, and is locally indexed on
336  // the calling process. However, it may be globally indexed (or
337  // neither locally nor globally indexed) on other processes.
338  // Assume that we want to recreate the column Map.
339  if (colMap->isContiguous()) {
340  // The number of indices on each process must fit in LO.
341  const LO numCurGids = static_cast<LO>(colMap->getLocalNumElements());
342  myColumns.resize(numCurGids);
343  const GO myFirstGblInd = colMap->getMinGlobalIndex();
344  for (LO k = 0; k < numCurGids; ++k) {
345  myColumns[k] = myFirstGblInd + static_cast<GO>(k);
346  }
347  } else { // the column Map is NOT contiguous
348  ArrayView<const GO> curGids = graph.getColMap()->getLocalElementList();
349  // The number of indices on each process must fit in LO.
350  const LO numCurGids = static_cast<LO>(curGids.size());
351  myColumns.resize(numCurGids);
352  for (LO k = 0; k < numCurGids; ++k) {
353  myColumns[k] = curGids[k];
354  }
355  } // whether the graph's current column Map is contiguous
356  } // does the graph currently have a column Map?
357  } else if (graph.isGloballyIndexed()) {
358  // Go through all the rows, finding the populated column indices.
359  //
360  // Our final list of indices for the column Map constructor will
361  // have the following properties (all of which are with respect to
362  // the calling process):
363  //
364  // 1. Indices in the domain Map go first.
365  // 2. Indices not in the domain Map follow, ordered first
366  // contiguously by their owning process rank (in the domain
367  // Map), then in increasing order within that.
368  // 3. No duplicate indices.
369  //
370  // This imitates the ordering used by Aztec(OO) and Epetra.
371  // Storing indices owned by the same process (in the domain Map)
372  // contiguously permits the use of contiguous send and receive
373  // buffers.
374  //
375  // We begin by partitioning the column indices into "local" GIDs
376  // (owned by the domain Map) and "remote" GIDs (not owned by the
377  // domain Map). We use the same order for local GIDs as the
378  // domain Map, so we track them in place in their array. We use
379  // an std::set (RemoteGIDSet) to keep track of remote GIDs, so
380  // that we don't have to merge duplicates later.
381  const LO LINV = Tpetra::Details::OrdinalTraits<LO>::invalid();
382  size_t numLocalColGIDs = 0;
383  size_t numRemoteColGIDs = 0;
384 
385  // GIDisLocal[lid] is false if and only if local index lid in the
386  // domain Map is remote (not local).
387  std::vector<bool> GIDisLocal(domMap->getLocalNumElements(), false);
388  std::set<GO> RemoteGIDSet;
389  // This preserves the not-sorted Epetra order of GIDs.
390  // We only use this if sortEachProcsGids is false.
391  std::vector<GO> RemoteGIDUnorderedVector;
392 
393  if (!graph.getRowMap().is_null()) {
394  const Tpetra::Map<LO, GO, NT>& rowMap = *(graph.getRowMap());
395  const LO lclNumRows = rowMap.getLocalNumElements();
396  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
397  const GO gblRow = rowMap.getGlobalElement(lclRow);
398  typename RowGraph<LO, GO, NT>::global_inds_host_view_type rowGids;
399  graph.getGlobalRowView(gblRow, rowGids);
400 
401  const LO numEnt = static_cast<LO>(rowGids.size());
402  if (numEnt != 0) {
403  for (LO k = 0; k < numEnt; ++k) {
404  const GO gid = rowGids[k];
405  const LO lid = domMap->getLocalElement(gid);
406  if (lid != LINV) {
407  const bool alreadyFound = GIDisLocal[lid];
408  if (!alreadyFound) {
409  GIDisLocal[lid] = true;
410  ++numLocalColGIDs;
411  }
412  } else {
413  const bool notAlreadyFound = RemoteGIDSet.insert(gid).second;
414  if (notAlreadyFound) { // gid did not exist in the set before
415  if (!sortEachProcsGids) {
416  // The user doesn't want to sort remote GIDs (for each
417  // remote process); they want us to keep remote GIDs
418  // in their original order. We do this by stuffing
419  // each remote GID into an array as we encounter it
420  // for the first time. The std::set helpfully tracks
421  // first encounters.
422  RemoteGIDUnorderedVector.push_back(gid);
423  }
424  ++numRemoteColGIDs;
425  }
426  }
427  } // for each entry k in row r
428  } // if row r contains > 0 entries
429  } // for each locally owned row r
430  } // if the graph has a nonnull row Map
431 
432  return makeColMapImpl<LO, GO, NT>(
433  colMap, remotePIDs,
434  domMap,
435  numLocalColGIDs, numRemoteColGIDs,
436  RemoteGIDSet, RemoteGIDUnorderedVector, GIDisLocal,
437  sortEachProcsGids, errStrm);
438 
439  } // if the graph is globally indexed
440  else {
441  // If we reach this point, the graph is neither locally nor
442  // globally indexed. Thus, the graph is empty on this process
443  // (per the usual legacy Petra convention), so myColumns will be
444  // left empty.
445  ; // do nothing
446  }
447 
448  const global_size_t INV =
449  Tpetra::Details::OrdinalTraits<global_size_t>::invalid();
450  // FIXME (mfh 05 Mar 2014) Doesn't the index base of a Map have to
451  // be the same as the Map's min GID? If the first column is empty
452  // (contains no entries), then the column Map's min GID won't
453  // necessarily be the same as the domain Map's index base.
454  const GO indexBase = domMap->getIndexBase();
455  colMap = rcp(new map_type(INV, myColumns, indexBase, domMap->getComm()));
456  return errCode;
457 }
458 
459 template <typename GOView, typename bitset_t>
460 struct GatherPresentEntries {
461  using GO = typename GOView::non_const_value_type;
462 
463  GatherPresentEntries(GO minGID_, const GOView& gids_, const bitset_t& present_)
464  : minGID(minGID_)
465  , gids(gids_)
466  , present(present_) {}
467 
468  KOKKOS_INLINE_FUNCTION void operator()(const GO i) const {
469  present.set(gids(i) - minGID);
470  }
471 
472  GO minGID;
473  GOView gids;
474  bitset_t present;
475 };
476 
477 template <typename LO, typename GO, typename device_t, typename LocalMapType, typename const_bitset_t, bool doingRemotes>
478 struct ListGIDs {
479  using mem_space = typename device_t::memory_space;
480  using GOView = Kokkos::View<GO*, mem_space>;
481  using SingleView = Kokkos::View<GO, mem_space>;
482 
483  ListGIDs(GO minGID_, GOView& gidList_, SingleView& numElems_, const_bitset_t& present_, const LocalMapType& localDomainMap_)
484  : minGID(minGID_)
485  , gidList(gidList_)
486  , numElems(numElems_)
487  , present(present_)
488  , localDomainMap(localDomainMap_) {}
489 
490  KOKKOS_INLINE_FUNCTION void operator()(const GO i, GO& lcount, const bool finalPass) const {
491  bool isRemote = localDomainMap.getLocalElement(i + minGID) == ::Tpetra::Details::OrdinalTraits<LO>::invalid();
492  if (present.test(i) && doingRemotes == isRemote) {
493  if (finalPass) {
494  // lcount is the index where this GID should be inserted in gidList.
495  gidList(lcount) = minGID + i;
496  }
497  lcount++;
498  }
499  if ((i == static_cast<GO>(present.size() - 1)) && finalPass) {
500  // Set the number of inserted indices in a single-element view
501  numElems() = lcount;
502  }
503  }
504 
505  GO minGID;
506  GOView gidList;
507  SingleView numElems;
508  const_bitset_t present;
509  const LocalMapType localDomainMap;
510 };
511 
512 template <typename GO, typename mem_space>
513 struct MinMaxReduceFunctor {
514  using MinMaxValue = typename Kokkos::MinMax<GO>::value_type;
515  using GOView = Kokkos::View<GO*, mem_space>;
516 
517  MinMaxReduceFunctor(const GOView& gids_)
518  : gids(gids_) {}
519 
520  KOKKOS_INLINE_FUNCTION void operator()(const GO i, MinMaxValue& lminmax) const {
521  GO gid = gids(i);
522  if (gid < lminmax.min_val)
523  lminmax.min_val = gid;
524  if (gid > lminmax.max_val)
525  lminmax.max_val = gid;
526  };
527 
528  const GOView gids;
529 };
530 
531 template <class LO, class GO, class NT>
532 int makeColMap(Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& colMap,
533  const Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& domMap,
534  Kokkos::View<GO*, typename NT::memory_space> gids,
535  std::ostream* errStrm) {
536  using Kokkos::RangePolicy;
537  using Teuchos::Array;
538  using Teuchos::RCP;
539  using device_t = typename NT::device_type;
540  using exec_space = typename device_t::execution_space;
541  using memory_space = typename device_t::memory_space;
542  // Note BMK 5-2021: this is deliberately not just device_t.
543  // Bitset cannot use HIPHostPinnedSpace currently, so this needs to
544  // use the default memory space for HIP (HIPSpace). Using the default mem
545  // space is fine for all other backends too. This bitset type is only used
546  // in this function so it won't cause type mismatches.
547  using bitset_t = Kokkos::Bitset<typename exec_space::memory_space>;
548  using const_bitset_t = Kokkos::ConstBitset<typename exec_space::memory_space>;
549  using GOView = Kokkos::View<GO*, memory_space>;
550  using SingleView = Kokkos::View<GO, memory_space>;
551  using map_type = Tpetra::Map<LO, GO, NT>;
552  using LocalMap = typename map_type::local_map_type;
553  GO nentries = gids.extent(0);
554  GO minGID = Teuchos::OrdinalTraits<GO>::max();
555  GO maxGID = 0;
556  using MinMaxValue = typename Kokkos::MinMax<GO>::value_type;
557  MinMaxValue minMaxGID = {minGID, maxGID};
558  Kokkos::parallel_reduce(RangePolicy<exec_space>(0, nentries),
559  MinMaxReduceFunctor<GO, memory_space>(gids),
560  Kokkos::MinMax<GO>(minMaxGID));
561  minGID = minMaxGID.min_val;
562  maxGID = minMaxGID.max_val;
563  // Now, know the full range of input GIDs.
564  // Determine the set of GIDs in the column map using a dense bitset, which corresponds to the range [minGID, maxGID]
565  bitset_t presentGIDs(maxGID - minGID + 1);
566  Kokkos::parallel_for(RangePolicy<exec_space>(0, nentries), GatherPresentEntries<GOView, bitset_t>(minGID, gids, presentGIDs));
567  const_bitset_t constPresentGIDs(presentGIDs);
568  // Get the set of local and remote GIDs on device
569  SingleView numLocals("Num local GIDs");
570  SingleView numRemotes("Num remote GIDs");
571  GOView localGIDView(Kokkos::ViewAllocateWithoutInitializing("Local GIDs"), constPresentGIDs.count());
572  GOView remoteGIDView(Kokkos::ViewAllocateWithoutInitializing("Remote GIDs"), constPresentGIDs.count());
573  LocalMap localDomMap = domMap->getLocalMap();
574  // This lists the locally owned GIDs in localGIDView
575  Kokkos::parallel_scan(RangePolicy<exec_space>(0, constPresentGIDs.size()),
576  ListGIDs<LO, GO, device_t, LocalMap, const_bitset_t, false>(minGID, localGIDView, numLocals, constPresentGIDs, localDomMap));
577  // And this lists the remote GIDs in remoteGIDView
578  Kokkos::parallel_scan(RangePolicy<exec_space>(0, constPresentGIDs.size()),
579  ListGIDs<LO, GO, device_t, LocalMap, const_bitset_t, true>(minGID, remoteGIDView, numRemotes, constPresentGIDs, localDomMap));
580  // Pull down the sizes
581  GO numLocalColGIDs = 0;
582  GO numRemoteColGIDs = 0;
583  // DEEP_COPY REVIEW - DEVICE-TO-VALUE
584  Kokkos::deep_copy(exec_space(), numLocalColGIDs, numLocals);
585  // DEEP_COPY REVIEW - DEVICE-TO-numLocalColGIDs
586  Kokkos::deep_copy(exec_space(), numRemoteColGIDs, numRemotes);
587  // Pull down the remote lists
588  auto localsHost = Kokkos::create_mirror_view(localGIDView);
589  auto remotesHost = Kokkos::create_mirror_view(remoteGIDView);
590  // DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
591  Kokkos::deep_copy(exec_space(), localsHost, localGIDView);
592  // DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
593  Kokkos::deep_copy(exec_space(), remotesHost, remoteGIDView);
594  // CAG: This fence was found to be required on Cuda with UVM=on.
595  Kokkos::fence("Tpetra::makeColMap");
596  // Finally, populate the STL structures which hold the index lists
597  std::set<GO> RemoteGIDSet;
598  std::vector<GO> RemoteGIDUnorderedVector;
599  std::vector<bool> GIDisLocal(domMap->getLocalNumElements(), false);
600  for (GO i = 0; i < numLocalColGIDs; i++) {
601  GO gid = localsHost(i);
602  // Already know that gid is locally owned, so this index will never be invalid().
603  // makeColMapImpl uses this and the domain map to get the the local GID list.
604  GIDisLocal[domMap->getLocalElement(gid)] = true;
605  }
606  RemoteGIDUnorderedVector.reserve(numRemoteColGIDs);
607  for (GO i = 0; i < numRemoteColGIDs; i++) {
608  GO gid = remotesHost(i);
609  RemoteGIDSet.insert(gid);
610  RemoteGIDUnorderedVector.push_back(gid);
611  }
612  // remotePIDs will be discarded in this version of makeColMap
613  Array<int> remotePIDs;
614  // Find the min and max GID
615  return makeColMapImpl<LO, GO, NT>(
616  colMap,
617  remotePIDs,
618  domMap,
619  static_cast<size_t>(numLocalColGIDs),
620  static_cast<size_t>(numRemoteColGIDs),
621  RemoteGIDSet,
622  RemoteGIDUnorderedVector,
623  GIDisLocal,
624  true, // always sort remotes
625  errStrm);
626 }
627 
628 } // namespace Details
629 } // namespace Tpetra
630 
631 //
632 // Explicit instantiation macros
633 //
634 // Must be expanded from within the Tpetra namespace!
635 //
636 #define TPETRA_DETAILS_MAKECOLMAP_INSTANT(LO, GO, NT) \
637  namespace Details { \
638  template int \
639  makeColMap(Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>&, \
640  Teuchos::Array<int>&, \
641  const Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>&, \
642  const RowGraph<LO, GO, NT>&, \
643  const bool, \
644  std::ostream*); \
645  template int \
646  makeColMap(Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>&, \
647  const Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>&, \
648  Kokkos::View<GO*, typename NT::memory_space>, \
649  std::ostream*); \
650  }
651 
652 #endif // TPETRA_DETAILS_MAKECOLMAP_DEF_HPP
virtual bool hasColMap() const =0
Whether the graph has a well-defined column Map.
An abstract interface for graphs accessed by rows.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
The Map that describes this graph&#39;s distribution of rows over processes.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
size_t getLocalNumElements() const
The number of elements belonging to the calling process.
int makeColMap(Teuchos::RCP< const Tpetra::Map< LO, GO, NT >> &colMap, Teuchos::Array< int > &remotePIDs, const Teuchos::RCP< const Tpetra::Map< LO, GO, NT >> &domMap, const RowGraph< LO, GO, NT > &graph, const bool sortEachProcsGids=true, std::ostream *errStrm=NULL)
Make the graph&#39;s column Map.
&quot;Local&quot; part of Map suitable for Kokkos kernels.
size_t global_size_t
Global size_t object.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2, const bool stableSort=false)
Sort the first array, and apply the resulting permutation to the second array.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getColMap() const =0
The Map that describes this graph&#39;s distribution of columns over processes.
virtual bool isGloballyIndexed() const =0
If graph indices are in the global range, this function returns true. Otherwise, this function return...
virtual void getGlobalRowView(const GlobalOrdinal gblRow, global_inds_host_view_type &gblColInds) const =0
Get a const, non-persisting view of the given global row&#39;s global column indices, as a Teuchos::Array...
A parallel distribution of indices over processes.
Stand-alone utility functions and macros.
virtual bool isLocallyIndexed() const =0
If graph indices are in the local range, this function returns true. Otherwise, this function returns...