Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_makeColMap_def.hpp
Go to the documentation of this file.
1 #ifndef TPETRA_DETAILS_MAKECOLMAP_DEF_HPP
2 #define TPETRA_DETAILS_MAKECOLMAP_DEF_HPP
3 
4 // @HEADER
5 // *****************************************************************************
6 // Tpetra: Templated Linear Algebra Services Package
7 //
8 // Copyright 2008 NTESS and the Tpetra contributors.
9 // SPDX-License-Identifier: BSD-3-Clause
10 // *****************************************************************************
11 // @HEADER
12 
23 
24 #include "Tpetra_RowGraph.hpp"
25 #include "Tpetra_Util.hpp"
26 #include "Teuchos_Array.hpp"
27 #include "Kokkos_Bitset.hpp"
28 #include <set>
29 #include <vector>
30 
31 namespace Tpetra {
32 namespace Details {
33 
34 template <class LO, class GO, class NT>
35 int
36 makeColMapImpl(Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& colMap,
37  Teuchos::Array<int>& remotePIDs,
38  const Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& domMap,
39  size_t numLocalColGIDs,
40  size_t numRemoteColGIDs,
41  std::set<GO>& RemoteGIDSet,
42  std::vector<GO>& RemoteGIDUnorderedVector,
43  std::vector<bool>& GIDisLocal,
44  const bool sortEachProcsGids,
45  std::ostream* errStrm)
46 {
47  using Teuchos::Array;
48  using Teuchos::ArrayView;
49  using Teuchos::rcp;
50  using std::endl;
51  int errCode = 0;
52  const char prefix[] = "Tpetra::Details::makeColMapImpl: ";
53  typedef ::Tpetra::Map<LO, GO, NT> map_type;
54  // Possible short-circuit for serial scenario:
55  //
56  // If all domain GIDs are present as column indices, then set
57  // ColMap=DomainMap. By construction, LocalGIDs is a subset of
58  // DomainGIDs.
59  //
60  // If we have
61  // * Number of remote GIDs is 0, so that ColGIDs == LocalGIDs,
62  // and
63  // * Number of local GIDs is number of domain GIDs
64  // then
65  // * LocalGIDs \subset DomainGIDs && size(LocalGIDs) ==
66  // size(DomainGIDs) => DomainGIDs == LocalGIDs == ColGIDs
67  // on the calling process.
68  //
69  // We will concern ourselves only with the special case of a
70  // serial DomainMap, obviating the need for communication.
71  //
72  // If
73  // * DomainMap has a serial communicator
74  // then we can set the column Map as the domain Map
75  // return. Benefit: this graph won't need an Import object
76  // later.
77  //
78  // Note, for a serial domain map, there can be no RemoteGIDs,
79  // because there are no remote processes. Likely explanations
80  // for this are:
81  // * user submitted erroneous column indices
82  // * user submitted erroneous domain Map
83  if (domMap->getComm ()->getSize () == 1) {
84  if (numRemoteColGIDs != 0) {
85  errCode = -2;
86  if (errStrm != NULL) {
87  *errStrm << prefix << "The domain Map only has one process, but "
88  << numRemoteColGIDs << " column "
89  << (numRemoteColGIDs != 1 ? "indices are" : "index is")
90  << " not in the domain Map. Either these indices are "
91  "invalid or the domain Map is invalid. Remember that nonsquare "
92  "matrices, or matrices where the row and range Maps differ, "
93  "require calling the version of fillComplete that takes the "
94  "domain and range Maps as input." << endl;
95  }
96  }
97  if (numLocalColGIDs == domMap->getLocalNumElements ()) {
98  colMap = domMap; // shallow copy
99  return errCode;
100  }
101  }
102 
103  // Populate myColumns with a list of all column GIDs. Put
104  // locally owned (in the domain Map) GIDs at the front: they
105  // correspond to "same" and "permuted" entries between the
106  // column Map and the domain Map. Put remote GIDs at the back.
107  Array<GO> myColumns(numLocalColGIDs + numRemoteColGIDs);
108  // get pointers into myColumns for each part
109  ArrayView<GO> LocalColGIDs = myColumns (0, numLocalColGIDs);
110  ArrayView<GO> remoteColGIDs = myColumns (numLocalColGIDs, numRemoteColGIDs);
111 
112  // Copy the remote GIDs into myColumns
113  if (sortEachProcsGids) {
114  // The std::set puts GIDs in increasing order.
115  std::copy (RemoteGIDSet.begin(), RemoteGIDSet.end(),
116  remoteColGIDs.begin());
117  }
118  else {
119  // Respect the originally encountered order.
120  std::copy (RemoteGIDUnorderedVector.begin(),
121  RemoteGIDUnorderedVector.end(), remoteColGIDs.begin());
122  }
123 
124  // Make a list of process ranks corresponding to the remote GIDs.
125  // remotePIDs is an output argument of getRemoteIndexList below;
126  // its initial contents don't matter.
127  if (static_cast<size_t> (remotePIDs.size ()) != numRemoteColGIDs) {
128  remotePIDs.resize (numRemoteColGIDs);
129  }
130  // Look up the remote process' ranks in the domain Map.
131  {
132  const LookupStatus stat =
133  domMap->getRemoteIndexList (remoteColGIDs, remotePIDs ());
134 
135  // If any process returns IDNotPresent, then at least one of
136  // the remote indices was not present in the domain Map. This
137  // means that the Import object cannot be constructed, because
138  // of incongruity between the column Map and domain Map.
139  // This has two likely causes:
140  // - The user has made a mistake in the column indices
141  // - The user has made a mistake with respect to the domain Map
142  if (stat == IDNotPresent) {
143  if (errStrm != NULL) {
144  *errStrm << prefix << "Some column indices are not in the domain Map."
145  "Either these column indices are invalid or the domain Map is "
146  "invalid. Likely cause: For a nonsquare matrix, you must give the "
147  "domain and range Maps as input to fillComplete." << endl;
148  }
149  // Don't return yet, because not all processes may have
150  // encountered this error state. This function ends with an
151  // all-reduce, so we have to make sure that everybody gets to
152  // that point. The resulting Map may be wrong, but at least
153  // nothing should crash.
154  errCode = -3;
155  }
156  }
157 
158  // Sort incoming remote column indices by their owning process
159  // rank, so that all columns coming from a given remote process
160  // are contiguous. This means the Import's Distributor doesn't
161  // need to reorder data.
162  //
163  // NOTE (mfh 02 Sep 2014) This needs to be a stable sort, so that
164  // it respects either of the possible orderings of GIDs (sorted,
165  // or original order) specified above.
166  sort2 (remotePIDs.begin (), remotePIDs.end (), remoteColGIDs.begin (), true);
167 
168  // Copy the local GIDs into myColumns. Two cases:
169  // 1. If the number of Local column GIDs is the same as the number
170  // of Local domain GIDs, we can simply read the domain GIDs
171  // into the front part of ColIndices (see logic above from the
172  // serial short circuit case)
173  // 2. We step through the GIDs of the DomainMap, checking to see
174  // if each domain GID is a column GID. We want to do this to
175  // maintain a consistent ordering of GIDs between the columns
176  // and the domain.
177 
178  const size_t numDomainElts = domMap->getLocalNumElements ();
179  if (numLocalColGIDs == numDomainElts) {
180  // If the number of locally owned GIDs are the same as the
181  // number of local domain Map elements, then the local domain
182  // Map elements are the same as the locally owned GIDs.
183  if (domMap->isContiguous ()) {
184  // NOTE (mfh 03 Mar 2013, 02 Sep 2014) In the common case that
185  // the domain Map is contiguous, it's more efficient to avoid
186  // calling getLocalElementList(), since that permanently
187  // constructs and caches the GID list in the contiguous Map.
188  GO curColMapGid = domMap->getMinGlobalIndex ();
189  for (size_t k = 0; k < numLocalColGIDs; ++k, ++curColMapGid) {
190  LocalColGIDs[k] = curColMapGid;
191  }
192  }
193  else {
194  ArrayView<const GO> domainElts = domMap->getLocalElementList ();
195  std::copy (domainElts.begin(), domainElts.end(), LocalColGIDs.begin());
196  }
197  }
198  else {
199  // Count the number of locally owned GIDs, both to keep track of
200  // the current array index, and as a sanity check.
201  size_t numLocalCount = 0;
202  if (domMap->isContiguous ()) {
203  // NOTE (mfh 03 Mar 2013, 02 Sep 2014) In the common case that
204  // the domain Map is contiguous, it's more efficient to avoid
205  // calling getLocalElementList(), since that permanently
206  // constructs and caches the GID list in the contiguous Map.
207  GO curColMapGid = domMap->getMinGlobalIndex ();
208  for (size_t i = 0; i < numDomainElts; ++i, ++curColMapGid) {
209  if (GIDisLocal[i]) {
210  LocalColGIDs[numLocalCount++] = curColMapGid;
211  }
212  }
213  }
214  else {
215  ArrayView<const GO> domainElts = domMap->getLocalElementList ();
216  for (size_t i = 0; i < numDomainElts; ++i) {
217  if (GIDisLocal[i]) {
218  LocalColGIDs[numLocalCount++] = domainElts[i];
219  }
220  }
221  }
222  if (numLocalCount != numLocalColGIDs) {
223  if (errStrm != NULL) {
224  *errStrm << prefix << "numLocalCount = " << numLocalCount
225  << " != numLocalColGIDs = " << numLocalColGIDs
226  << ". This should never happen. "
227  "Please report this bug to the Tpetra developers." << endl;
228  }
229  // Don't return yet, because not all processes may have
230  // encountered this error state. This function ends with an
231  // all-reduce, so we have to make sure that everybody gets to
232  // that point.
233  errCode = -4;
234  }
235  }
236 
237  // FIXME (mfh 03 Apr 2013) Now would be a good time to use the
238  // information we collected above to construct the Import. In
239  // particular, building an Import requires:
240  //
241  // 1. numSameIDs (length of initial contiguous sequence of GIDs
242  // on this process that are the same in both Maps; this
243  // equals the number of domain Map elements on this process)
244  //
245  // 2. permuteToLIDs and permuteFromLIDs (both empty in this
246  // case, since there's no permutation going on; the column
247  // Map starts with the domain Map's GIDs, and immediately
248  // after them come the remote GIDs)
249  //
250  // 3. remoteGIDs (exactly those GIDs that we found out above
251  // were not in the domain Map) and remoteLIDs (which we could
252  // have gotten above by using the three-argument version of
253  // getRemoteIndexList() that computes local indices as well
254  // as process ranks, instead of the two-argument version that
255  // was used above)
256  //
257  // 4. remotePIDs (which we have from the getRemoteIndexList() call
258  // above) -- NOTE (mfh 14 Sep 2017) Fix for Trilinos GitHub
259  // Issue #628 (https://github.com/trilinos/Trilinos/issues/628)
260  // addresses this. remotePIDs is now an output argument of
261  // both this function and CrsGraph::makeColMap, and
262  // CrsGraph::makeImportExport now has an option to use that
263  // information, if available (that is, if makeColMap was
264  // actually called, which it would not be if the graph already
265  // had a column Map).
266  //
267  // 5. Apply the permutation from sorting remotePIDs to both
268  // remoteGIDs and remoteLIDs (by calling sort3 above instead of
269  // sort2), instead of just to remoteLIDs alone.
270  //
271  // 6. Everything after the sort3 call in Import::setupExport():
272  // a. Create the Distributor via createFromRecvs(), which
273  // computes exportGIDs and exportPIDs
274  // b. Compute exportLIDs from exportGIDs (by asking the
275  // source Map, in this case the domain Map, to convert
276  // global to local)
277  //
278  // Steps 1-5 come for free, since we must do that work anyway in
279  // order to compute the column Map. In particular, Step 3 is
280  // even more expensive than Step 6a, since it involves both
281  // creating and using a new Distributor object.
282  const global_size_t INV =
283  Tpetra::Details::OrdinalTraits<global_size_t>::invalid ();
284  // FIXME (mfh 05 Mar 2014) Doesn't the index base of a Map have to
285  // be the same as the Map's min GID? If the first column is empty
286  // (contains no entries), then the column Map's min GID won't
287  // necessarily be the same as the domain Map's index base.
288  const GO indexBase = domMap->getIndexBase ();
289  colMap = rcp (new map_type (INV, myColumns, indexBase, domMap->getComm ()));
290  return errCode;
291 }
292 
293 template <class LO, class GO, class NT>
294 int
295 makeColMap (Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >& colMap,
296  Teuchos::Array<int>& remotePIDs,
297  const Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >& domMap,
298  const RowGraph<LO, GO, NT>& graph,
299  const bool sortEachProcsGids,
300  std::ostream* errStrm)
301 {
302  using Teuchos::Array;
303  using Teuchos::ArrayView;
304  using Teuchos::rcp;
305  using std::endl;
306  typedef ::Tpetra::Map<LO, GO, NT> map_type;
307  const char prefix[] = "Tpetra::Details::makeColMap: ";
308  int errCode = 0;
309 
310  // If the input domain Map or its communicator is null on the
311  // calling process, then the calling process does not participate in
312  // the returned column Map. Thus, we can set the returned column
313  // Map to null on those processes, and return immediately. This is
314  // not an error condition, as long as when domMap and its
315  // communicator are NOT null, the graph's other Maps and
316  // communicator are not also null.
317  if (domMap.is_null () || domMap->getComm ().is_null ()) {
318  colMap = Teuchos::null;
319  return errCode;
320  }
321 
322  Array<GO> myColumns;
323  if (graph.isLocallyIndexed ()) {
324  colMap = graph.getColMap ();
325  // If the graph is locally indexed, it had better have a column Map.
326  // The extra check for ! graph.hasColMap() is conservative.
327  if (colMap.is_null () || ! graph.hasColMap ()) {
328  errCode = -1;
329  if (errStrm != NULL) {
330  *errStrm << prefix << "The graph is locally indexed on the calling "
331  "process, but has no column Map (either getColMap() returns null, "
332  "or hasColMap() returns false)." << endl;
333  }
334  // Under this error condition, this process will not fill
335  // myColumns. The resulting new column Map will be incorrect,
336  // but at least we won't hang, and this process will report the
337  // error.
338  }
339  else {
340  // The graph already has a column Map, and is locally indexed on
341  // the calling process. However, it may be globally indexed (or
342  // neither locally nor globally indexed) on other processes.
343  // Assume that we want to recreate the column Map.
344  if (colMap->isContiguous ()) {
345  // The number of indices on each process must fit in LO.
346  const LO numCurGids = static_cast<LO> (colMap->getLocalNumElements ());
347  myColumns.resize (numCurGids);
348  const GO myFirstGblInd = colMap->getMinGlobalIndex ();
349  for (LO k = 0; k < numCurGids; ++k) {
350  myColumns[k] = myFirstGblInd + static_cast<GO> (k);
351  }
352  }
353  else { // the column Map is NOT contiguous
354  ArrayView<const GO> curGids = graph.getColMap ()->getLocalElementList ();
355  // The number of indices on each process must fit in LO.
356  const LO numCurGids = static_cast<LO> (curGids.size ());
357  myColumns.resize (numCurGids);
358  for (LO k = 0; k < numCurGids; ++k) {
359  myColumns[k] = curGids[k];
360  }
361  } // whether the graph's current column Map is contiguous
362  } // does the graph currently have a column Map?
363  }
364  else if (graph.isGloballyIndexed ()) {
365  // Go through all the rows, finding the populated column indices.
366  //
367  // Our final list of indices for the column Map constructor will
368  // have the following properties (all of which are with respect to
369  // the calling process):
370  //
371  // 1. Indices in the domain Map go first.
372  // 2. Indices not in the domain Map follow, ordered first
373  // contiguously by their owning process rank (in the domain
374  // Map), then in increasing order within that.
375  // 3. No duplicate indices.
376  //
377  // This imitates the ordering used by Aztec(OO) and Epetra.
378  // Storing indices owned by the same process (in the domain Map)
379  // contiguously permits the use of contiguous send and receive
380  // buffers.
381  //
382  // We begin by partitioning the column indices into "local" GIDs
383  // (owned by the domain Map) and "remote" GIDs (not owned by the
384  // domain Map). We use the same order for local GIDs as the
385  // domain Map, so we track them in place in their array. We use
386  // an std::set (RemoteGIDSet) to keep track of remote GIDs, so
387  // that we don't have to merge duplicates later.
388  const LO LINV = Tpetra::Details::OrdinalTraits<LO>::invalid ();
389  size_t numLocalColGIDs = 0;
390  size_t numRemoteColGIDs = 0;
391 
392  // GIDisLocal[lid] is false if and only if local index lid in the
393  // domain Map is remote (not local).
394  std::vector<bool> GIDisLocal (domMap->getLocalNumElements (), false);
395  std::set<GO> RemoteGIDSet;
396  // This preserves the not-sorted Epetra order of GIDs.
397  // We only use this if sortEachProcsGids is false.
398  std::vector<GO> RemoteGIDUnorderedVector;
399 
400  if (! graph.getRowMap ().is_null ()) {
401  const Tpetra::Map<LO, GO, NT>& rowMap = * (graph.getRowMap ());
402  const LO lclNumRows = rowMap.getLocalNumElements ();
403  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
404  const GO gblRow = rowMap.getGlobalElement (lclRow);
405  typename RowGraph<LO,GO,NT>::global_inds_host_view_type rowGids;
406  graph.getGlobalRowView (gblRow, rowGids);
407 
408  const LO numEnt = static_cast<LO> (rowGids.size ());
409  if (numEnt != 0) {
410  for (LO k = 0; k < numEnt; ++k) {
411  const GO gid = rowGids[k];
412  const LO lid = domMap->getLocalElement (gid);
413  if (lid != LINV) {
414  const bool alreadyFound = GIDisLocal[lid];
415  if (! alreadyFound) {
416  GIDisLocal[lid] = true;
417  ++numLocalColGIDs;
418  }
419  }
420  else {
421  const bool notAlreadyFound = RemoteGIDSet.insert (gid).second;
422  if (notAlreadyFound) { // gid did not exist in the set before
423  if (! sortEachProcsGids) {
424  // The user doesn't want to sort remote GIDs (for each
425  // remote process); they want us to keep remote GIDs
426  // in their original order. We do this by stuffing
427  // each remote GID into an array as we encounter it
428  // for the first time. The std::set helpfully tracks
429  // first encounters.
430  RemoteGIDUnorderedVector.push_back (gid);
431  }
432  ++numRemoteColGIDs;
433  }
434  }
435  } // for each entry k in row r
436  } // if row r contains > 0 entries
437  } // for each locally owned row r
438  } // if the graph has a nonnull row Map
439 
440  return makeColMapImpl<LO, GO, NT>(
441  colMap, remotePIDs,
442  domMap,
443  numLocalColGIDs, numRemoteColGIDs,
444  RemoteGIDSet, RemoteGIDUnorderedVector, GIDisLocal,
445  sortEachProcsGids, errStrm);
446 
447  } // if the graph is globally indexed
448  else {
449  // If we reach this point, the graph is neither locally nor
450  // globally indexed. Thus, the graph is empty on this process
451  // (per the usual legacy Petra convention), so myColumns will be
452  // left empty.
453  ; // do nothing
454  }
455 
456  const global_size_t INV =
457  Tpetra::Details::OrdinalTraits<global_size_t>::invalid ();
458  // FIXME (mfh 05 Mar 2014) Doesn't the index base of a Map have to
459  // be the same as the Map's min GID? If the first column is empty
460  // (contains no entries), then the column Map's min GID won't
461  // necessarily be the same as the domain Map's index base.
462  const GO indexBase = domMap->getIndexBase ();
463  colMap = rcp (new map_type (INV, myColumns, indexBase, domMap->getComm ()));
464  return errCode;
465 }
466 
467 template<typename GOView, typename bitset_t>
468 struct GatherPresentEntries
469 {
470  using GO = typename GOView::non_const_value_type;
471 
472  GatherPresentEntries(GO minGID_, const GOView& gids_, const bitset_t& present_)
473  : minGID(minGID_), gids(gids_), present(present_)
474  {}
475 
476  KOKKOS_INLINE_FUNCTION void operator()(const GO i) const
477  {
478  present.set(gids(i) - minGID);
479  }
480 
481  GO minGID;
482  GOView gids;
483  bitset_t present;
484 };
485 
486 template<typename LO, typename GO, typename device_t, typename LocalMapType, typename const_bitset_t, bool doingRemotes>
487 struct ListGIDs
488 {
489  using mem_space = typename device_t::memory_space;
490  using GOView = Kokkos::View<GO*, mem_space>;
491  using SingleView = Kokkos::View<GO, mem_space>;
492 
493  ListGIDs(GO minGID_, GOView& gidList_, SingleView& numElems_, const_bitset_t& present_, const LocalMapType& localDomainMap_)
494  : minGID(minGID_), gidList(gidList_), numElems(numElems_), present(present_), localDomainMap(localDomainMap_)
495  {}
496 
497  KOKKOS_INLINE_FUNCTION void operator()(const GO i, GO& lcount, const bool finalPass) const
498  {
499  bool isRemote = localDomainMap.getLocalElement(i + minGID) == ::Tpetra::Details::OrdinalTraits<LO>::invalid();
500  if(present.test(i) && doingRemotes == isRemote)
501  {
502  if(finalPass)
503  {
504  //lcount is the index where this GID should be inserted in gidList.
505  gidList(lcount) = minGID + i;
506  }
507  lcount++;
508  }
509  if((i == static_cast<GO>(present.size() - 1)) && finalPass)
510  {
511  //Set the number of inserted indices in a single-element view
512  numElems() = lcount;
513  }
514  }
515 
516  GO minGID;
517  GOView gidList;
518  SingleView numElems;
519  const_bitset_t present;
520  const LocalMapType localDomainMap;
521 };
522 
523 template<typename GO, typename mem_space>
524 struct MinMaxReduceFunctor
525 {
526  using MinMaxValue = typename Kokkos::MinMax<GO>::value_type;
527  using GOView = Kokkos::View<GO*, mem_space>;
528 
529  MinMaxReduceFunctor(const GOView& gids_)
530  : gids(gids_)
531  {}
532 
533  KOKKOS_INLINE_FUNCTION void operator()(const GO i, MinMaxValue& lminmax) const
534  {
535  GO gid = gids(i);
536  if(gid < lminmax.min_val)
537  lminmax.min_val = gid;
538  if(gid > lminmax.max_val)
539  lminmax.max_val = gid;
540  };
541 
542  const GOView gids;
543 };
544 
545 template <class LO, class GO, class NT>
546 int
547 makeColMap (Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& colMap,
548  const Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& domMap,
549  Kokkos::View<GO*, typename NT::memory_space> gids,
550  std::ostream* errStrm)
551 {
552  using Teuchos::RCP;
553  using Teuchos::Array;
554  using Kokkos::RangePolicy;
555  using device_t = typename NT::device_type;
556  using exec_space = typename device_t::execution_space;
557  using memory_space = typename device_t::memory_space;
558  // Note BMK 5-2021: this is deliberately not just device_t.
559  // Bitset cannot use HIPHostPinnedSpace currently, so this needs to
560  // use the default memory space for HIP (HIPSpace). Using the default mem
561  // space is fine for all other backends too. This bitset type is only used
562  // in this function so it won't cause type mismatches.
563  using bitset_t = Kokkos::Bitset<typename exec_space::memory_space>;
564  using const_bitset_t = Kokkos::ConstBitset<typename exec_space::memory_space>;
565  using GOView = Kokkos::View<GO*, memory_space>;
566  using SingleView = Kokkos::View<GO, memory_space>;
567  using map_type = Tpetra::Map<LO, GO, NT>;
568  using LocalMap = typename map_type::local_map_type;
569  GO nentries = gids.extent(0);
570  GO minGID = Teuchos::OrdinalTraits<GO>::max();
571  GO maxGID = 0;
572  using MinMaxValue = typename Kokkos::MinMax<GO>::value_type;
573  MinMaxValue minMaxGID = {minGID, maxGID};
574  Kokkos::parallel_reduce(RangePolicy<exec_space>(0, nentries),
575  MinMaxReduceFunctor<GO, memory_space>(gids),
576  Kokkos::MinMax<GO>(minMaxGID));
577  minGID = minMaxGID.min_val; maxGID = minMaxGID.max_val;
578  //Now, know the full range of input GIDs.
579  //Determine the set of GIDs in the column map using a dense bitset, which corresponds to the range [minGID, maxGID]
580  bitset_t presentGIDs(maxGID - minGID + 1);
581  Kokkos::parallel_for(RangePolicy<exec_space>(0, nentries), GatherPresentEntries<GOView, bitset_t>(minGID, gids, presentGIDs));
582  const_bitset_t constPresentGIDs(presentGIDs);
583  //Get the set of local and remote GIDs on device
584  SingleView numLocals("Num local GIDs");
585  SingleView numRemotes("Num remote GIDs");
586  GOView localGIDView(Kokkos::ViewAllocateWithoutInitializing("Local GIDs"), constPresentGIDs.count());
587  GOView remoteGIDView(Kokkos::ViewAllocateWithoutInitializing("Remote GIDs"), constPresentGIDs.count());
588  LocalMap localDomMap = domMap->getLocalMap();
589  //This lists the locally owned GIDs in localGIDView
590  Kokkos::parallel_scan(RangePolicy<exec_space>(0, constPresentGIDs.size()),
591  ListGIDs<LO, GO, device_t, LocalMap, const_bitset_t, false>
592  (minGID, localGIDView, numLocals, constPresentGIDs, localDomMap));
593  //And this lists the remote GIDs in remoteGIDView
594  Kokkos::parallel_scan(RangePolicy<exec_space>(0, constPresentGIDs.size()),
595  ListGIDs<LO, GO, device_t, LocalMap, const_bitset_t, true>
596  (minGID, remoteGIDView, numRemotes, constPresentGIDs, localDomMap));
597  //Pull down the sizes
598  GO numLocalColGIDs = 0;
599  GO numRemoteColGIDs = 0;
600  // DEEP_COPY REVIEW - DEVICE-TO-VALUE
601  Kokkos::deep_copy(exec_space(), numLocalColGIDs, numLocals);
602  // DEEP_COPY REVIEW - DEVICE-TO-numLocalColGIDs
603  Kokkos::deep_copy(exec_space(), numRemoteColGIDs, numRemotes);
604  //Pull down the remote lists
605  auto localsHost = Kokkos::create_mirror_view(localGIDView);
606  auto remotesHost = Kokkos::create_mirror_view(remoteGIDView);
607  // DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
608  Kokkos::deep_copy(exec_space(), localsHost, localGIDView);
609  // DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
610  Kokkos::deep_copy(exec_space(), remotesHost, remoteGIDView);
611  // CAG: This fence was found to be required on Cuda with UVM=on.
612  Kokkos::fence("Tpetra::makeColMap");
613  //Finally, populate the STL structures which hold the index lists
614  std::set<GO> RemoteGIDSet;
615  std::vector<GO> RemoteGIDUnorderedVector;
616  std::vector<bool> GIDisLocal (domMap->getLocalNumElements (), false);
617  for(GO i = 0; i < numLocalColGIDs; i++)
618  {
619  GO gid = localsHost(i);
620  //Already know that gid is locally owned, so this index will never be invalid().
621  //makeColMapImpl uses this and the domain map to get the the local GID list.
622  GIDisLocal[domMap->getLocalElement(gid)] = true;
623  }
624  RemoteGIDUnorderedVector.reserve(numRemoteColGIDs);
625  for(GO i = 0; i < numRemoteColGIDs; i++)
626  {
627  GO gid = remotesHost(i);
628  RemoteGIDSet.insert(gid);
629  RemoteGIDUnorderedVector.push_back(gid);
630  }
631  //remotePIDs will be discarded in this version of makeColMap
632  Array<int> remotePIDs;
633  //Find the min and max GID
634  return makeColMapImpl<LO, GO, NT>(
635  colMap,
636  remotePIDs,
637  domMap,
638  static_cast<size_t>(numLocalColGIDs),
639  static_cast<size_t>(numRemoteColGIDs),
640  RemoteGIDSet,
641  RemoteGIDUnorderedVector,
642  GIDisLocal,
643  true, //always sort remotes
644  errStrm);
645 }
646 
647 } // namespace Details
648 } // namespace Tpetra
649 
650 //
651 // Explicit instantiation macros
652 //
653 // Must be expanded from within the Tpetra namespace!
654 //
655 #define TPETRA_DETAILS_MAKECOLMAP_INSTANT(LO,GO,NT) \
656  namespace Details { \
657  template int \
658  makeColMap (Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >&, \
659  Teuchos::Array<int>&, \
660  const Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >&, \
661  const RowGraph<LO, GO, NT>&, \
662  const bool, \
663  std::ostream*); \
664  template int \
665  makeColMap (Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >&, \
666  const Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >&, \
667  Kokkos::View<GO*, typename NT::memory_space>, \
668  std::ostream*); \
669  }
670 
671 #endif // TPETRA_DETAILS_MAKECOLMAP_DEF_HPP
virtual bool hasColMap() const =0
Whether the graph has a well-defined column Map.
An abstract interface for graphs accessed by rows.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
size_t getLocalNumElements() const
The number of elements belonging to the calling process.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getColMap() const =0
The Map that describes this graph&#39;s distribution of columns over processes.
&quot;Local&quot; part of Map suitable for Kokkos kernels.
size_t global_size_t
Global size_t object.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2, const bool stableSort=false)
Sort the first array, and apply the resulting permutation to the second array.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
The Map that describes this graph&#39;s distribution of rows over processes.
virtual bool isGloballyIndexed() const =0
If graph indices are in the global range, this function returns true. Otherwise, this function return...
virtual void getGlobalRowView(const GlobalOrdinal gblRow, global_inds_host_view_type &gblColInds) const =0
Get a const, non-persisting view of the given global row&#39;s global column indices, as a Teuchos::Array...
A parallel distribution of indices over processes.
int makeColMap(Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &colMap, Teuchos::Array< int > &remotePIDs, const Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &domMap, const RowGraph< LO, GO, NT > &graph, const bool sortEachProcsGids=true, std::ostream *errStrm=NULL)
Make the graph&#39;s column Map.
Stand-alone utility functions and macros.
virtual bool isLocallyIndexed() const =0
If graph indices are in the local range, this function returns true. Otherwise, this function returns...