Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_makeColMap_def.hpp
Go to the documentation of this file.
1 #ifndef TPETRA_DETAILS_MAKECOLMAP_DEF_HPP
2 #define TPETRA_DETAILS_MAKECOLMAP_DEF_HPP
3 
4 // @HEADER
5 // ***********************************************************************
6 //
7 // Tpetra: Templated Linear Algebra Services Package
8 // Copyright (2008) Sandia Corporation
9 //
10 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
11 // the U.S. Government retains certain rights in this software.
12 //
13 // Redistribution and use in source and binary forms, with or without
14 // modification, are permitted provided that the following conditions are
15 // met:
16 //
17 // 1. Redistributions of source code must retain the above copyright
18 // notice, this list of conditions and the following disclaimer.
19 //
20 // 2. Redistributions in binary form must reproduce the above copyright
21 // notice, this list of conditions and the following disclaimer in the
22 // documentation and/or other materials provided with the distribution.
23 //
24 // 3. Neither the name of the Corporation nor the names of the
25 // contributors may be used to endorse or promote products derived from
26 // this software without specific prior written permission.
27 //
28 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
29 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
32 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
35 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
36 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
37 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
38 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 //
40 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
41 //
42 // ************************************************************************
43 // @HEADER
44 
55 
56 #include "Tpetra_RowGraph.hpp"
57 #include "Tpetra_Util.hpp"
58 #include "Teuchos_Array.hpp"
59 #include "Kokkos_Bitset.hpp"
60 #include <set>
61 #include <vector>
62 
63 namespace Tpetra {
64 namespace Details {
65 
66 template <class LO, class GO, class NT>
67 int
68 makeColMapImpl(Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& colMap,
69  Teuchos::Array<int>& remotePIDs,
70  const Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& domMap,
71  size_t numLocalColGIDs,
72  size_t numRemoteColGIDs,
73  std::set<GO>& RemoteGIDSet,
74  std::vector<GO>& RemoteGIDUnorderedVector,
75  std::vector<bool>& GIDisLocal,
76  const bool sortEachProcsGids,
77  std::ostream* errStrm)
78 {
79  using Teuchos::Array;
80  using Teuchos::ArrayView;
81  using Teuchos::rcp;
82  using std::endl;
83  int errCode = 0;
84  const char prefix[] = "Tpetra::Details::makeColMapImpl: ";
85  typedef ::Tpetra::Map<LO, GO, NT> map_type;
86  // Possible short-circuit for serial scenario:
87  //
88  // If all domain GIDs are present as column indices, then set
89  // ColMap=DomainMap. By construction, LocalGIDs is a subset of
90  // DomainGIDs.
91  //
92  // If we have
93  // * Number of remote GIDs is 0, so that ColGIDs == LocalGIDs,
94  // and
95  // * Number of local GIDs is number of domain GIDs
96  // then
97  // * LocalGIDs \subset DomainGIDs && size(LocalGIDs) ==
98  // size(DomainGIDs) => DomainGIDs == LocalGIDs == ColGIDs
99  // on the calling process.
100  //
101  // We will concern ourselves only with the special case of a
102  // serial DomainMap, obviating the need for communication.
103  //
104  // If
105  // * DomainMap has a serial communicator
106  // then we can set the column Map as the domain Map
107  // return. Benefit: this graph won't need an Import object
108  // later.
109  //
110  // Note, for a serial domain map, there can be no RemoteGIDs,
111  // because there are no remote processes. Likely explanations
112  // for this are:
113  // * user submitted erroneous column indices
114  // * user submitted erroneous domain Map
115  if (domMap->getComm ()->getSize () == 1) {
116  if (numRemoteColGIDs != 0) {
117  errCode = -2;
118  if (errStrm != NULL) {
119  *errStrm << prefix << "The domain Map only has one process, but "
120  << numRemoteColGIDs << " column "
121  << (numRemoteColGIDs != 1 ? "indices are" : "index is")
122  << " not in the domain Map. Either these indices are "
123  "invalid or the domain Map is invalid. Remember that nonsquare "
124  "matrices, or matrices where the row and range Maps differ, "
125  "require calling the version of fillComplete that takes the "
126  "domain and range Maps as input." << endl;
127  }
128  }
129  if (numLocalColGIDs == domMap->getLocalNumElements ()) {
130  colMap = domMap; // shallow copy
131  return errCode;
132  }
133  }
134 
135  // Populate myColumns with a list of all column GIDs. Put
136  // locally owned (in the domain Map) GIDs at the front: they
137  // correspond to "same" and "permuted" entries between the
138  // column Map and the domain Map. Put remote GIDs at the back.
139  Array<GO> myColumns(numLocalColGIDs + numRemoteColGIDs);
140  // get pointers into myColumns for each part
141  ArrayView<GO> LocalColGIDs = myColumns (0, numLocalColGIDs);
142  ArrayView<GO> remoteColGIDs = myColumns (numLocalColGIDs, numRemoteColGIDs);
143 
144  // Copy the remote GIDs into myColumns
145  if (sortEachProcsGids) {
146  // The std::set puts GIDs in increasing order.
147  std::copy (RemoteGIDSet.begin(), RemoteGIDSet.end(),
148  remoteColGIDs.begin());
149  }
150  else {
151  // Respect the originally encountered order.
152  std::copy (RemoteGIDUnorderedVector.begin(),
153  RemoteGIDUnorderedVector.end(), remoteColGIDs.begin());
154  }
155 
156  // Make a list of process ranks corresponding to the remote GIDs.
157  // remotePIDs is an output argument of getRemoteIndexList below;
158  // its initial contents don't matter.
159  if (static_cast<size_t> (remotePIDs.size ()) != numRemoteColGIDs) {
160  remotePIDs.resize (numRemoteColGIDs);
161  }
162  // Look up the remote process' ranks in the domain Map.
163  {
164  const LookupStatus stat =
165  domMap->getRemoteIndexList (remoteColGIDs, remotePIDs ());
166 
167  // If any process returns IDNotPresent, then at least one of
168  // the remote indices was not present in the domain Map. This
169  // means that the Import object cannot be constructed, because
170  // of incongruity between the column Map and domain Map.
171  // This has two likely causes:
172  // - The user has made a mistake in the column indices
173  // - The user has made a mistake with respect to the domain Map
174  if (stat == IDNotPresent) {
175  if (errStrm != NULL) {
176  *errStrm << prefix << "Some column indices are not in the domain Map."
177  "Either these column indices are invalid or the domain Map is "
178  "invalid. Likely cause: For a nonsquare matrix, you must give the "
179  "domain and range Maps as input to fillComplete." << endl;
180  }
181  // Don't return yet, because not all processes may have
182  // encountered this error state. This function ends with an
183  // all-reduce, so we have to make sure that everybody gets to
184  // that point. The resulting Map may be wrong, but at least
185  // nothing should crash.
186  errCode = -3;
187  }
188  }
189 
190  // Sort incoming remote column indices by their owning process
191  // rank, so that all columns coming from a given remote process
192  // are contiguous. This means the Import's Distributor doesn't
193  // need to reorder data.
194  //
195  // NOTE (mfh 02 Sep 2014) This needs to be a stable sort, so that
196  // it respects either of the possible orderings of GIDs (sorted,
197  // or original order) specified above.
198  sort2 (remotePIDs.begin (), remotePIDs.end (), remoteColGIDs.begin ());
199 
200  // Copy the local GIDs into myColumns. Two cases:
201  // 1. If the number of Local column GIDs is the same as the number
202  // of Local domain GIDs, we can simply read the domain GIDs
203  // into the front part of ColIndices (see logic above from the
204  // serial short circuit case)
205  // 2. We step through the GIDs of the DomainMap, checking to see
206  // if each domain GID is a column GID. We want to do this to
207  // maintain a consistent ordering of GIDs between the columns
208  // and the domain.
209 
210  const size_t numDomainElts = domMap->getLocalNumElements ();
211  if (numLocalColGIDs == numDomainElts) {
212  // If the number of locally owned GIDs are the same as the
213  // number of local domain Map elements, then the local domain
214  // Map elements are the same as the locally owned GIDs.
215  if (domMap->isContiguous ()) {
216  // NOTE (mfh 03 Mar 2013, 02 Sep 2014) In the common case that
217  // the domain Map is contiguous, it's more efficient to avoid
218  // calling getLocalElementList(), since that permanently
219  // constructs and caches the GID list in the contiguous Map.
220  GO curColMapGid = domMap->getMinGlobalIndex ();
221  for (size_t k = 0; k < numLocalColGIDs; ++k, ++curColMapGid) {
222  LocalColGIDs[k] = curColMapGid;
223  }
224  }
225  else {
226  ArrayView<const GO> domainElts = domMap->getLocalElementList ();
227  std::copy (domainElts.begin(), domainElts.end(), LocalColGIDs.begin());
228  }
229  }
230  else {
231  // Count the number of locally owned GIDs, both to keep track of
232  // the current array index, and as a sanity check.
233  size_t numLocalCount = 0;
234  if (domMap->isContiguous ()) {
235  // NOTE (mfh 03 Mar 2013, 02 Sep 2014) In the common case that
236  // the domain Map is contiguous, it's more efficient to avoid
237  // calling getLocalElementList(), since that permanently
238  // constructs and caches the GID list in the contiguous Map.
239  GO curColMapGid = domMap->getMinGlobalIndex ();
240  for (size_t i = 0; i < numDomainElts; ++i, ++curColMapGid) {
241  if (GIDisLocal[i]) {
242  LocalColGIDs[numLocalCount++] = curColMapGid;
243  }
244  }
245  }
246  else {
247  ArrayView<const GO> domainElts = domMap->getLocalElementList ();
248  for (size_t i = 0; i < numDomainElts; ++i) {
249  if (GIDisLocal[i]) {
250  LocalColGIDs[numLocalCount++] = domainElts[i];
251  }
252  }
253  }
254  if (numLocalCount != numLocalColGIDs) {
255  if (errStrm != NULL) {
256  *errStrm << prefix << "numLocalCount = " << numLocalCount
257  << " != numLocalColGIDs = " << numLocalColGIDs
258  << ". This should never happen. "
259  "Please report this bug to the Tpetra developers." << endl;
260  }
261  // Don't return yet, because not all processes may have
262  // encountered this error state. This function ends with an
263  // all-reduce, so we have to make sure that everybody gets to
264  // that point.
265  errCode = -4;
266  }
267  }
268 
269  // FIXME (mfh 03 Apr 2013) Now would be a good time to use the
270  // information we collected above to construct the Import. In
271  // particular, building an Import requires:
272  //
273  // 1. numSameIDs (length of initial contiguous sequence of GIDs
274  // on this process that are the same in both Maps; this
275  // equals the number of domain Map elements on this process)
276  //
277  // 2. permuteToLIDs and permuteFromLIDs (both empty in this
278  // case, since there's no permutation going on; the column
279  // Map starts with the domain Map's GIDs, and immediately
280  // after them come the remote GIDs)
281  //
282  // 3. remoteGIDs (exactly those GIDs that we found out above
283  // were not in the domain Map) and remoteLIDs (which we could
284  // have gotten above by using the three-argument version of
285  // getRemoteIndexList() that computes local indices as well
286  // as process ranks, instead of the two-argument version that
287  // was used above)
288  //
289  // 4. remotePIDs (which we have from the getRemoteIndexList() call
290  // above) -- NOTE (mfh 14 Sep 2017) Fix for Trilinos GitHub
291  // Issue #628 (https://github.com/trilinos/Trilinos/issues/628)
292  // addresses this. remotePIDs is now an output argument of
293  // both this function and CrsGraph::makeColMap, and
294  // CrsGraph::makeImportExport now has an option to use that
295  // information, if available (that is, if makeColMap was
296  // actually called, which it would not be if the graph already
297  // had a column Map).
298  //
299  // 5. Apply the permutation from sorting remotePIDs to both
300  // remoteGIDs and remoteLIDs (by calling sort3 above instead of
301  // sort2), instead of just to remoteLIDs alone.
302  //
303  // 6. Everything after the sort3 call in Import::setupExport():
304  // a. Create the Distributor via createFromRecvs(), which
305  // computes exportGIDs and exportPIDs
306  // b. Compute exportLIDs from exportGIDs (by asking the
307  // source Map, in this case the domain Map, to convert
308  // global to local)
309  //
310  // Steps 1-5 come for free, since we must do that work anyway in
311  // order to compute the column Map. In particular, Step 3 is
312  // even more expensive than Step 6a, since it involves both
313  // creating and using a new Distributor object.
314  const global_size_t INV =
315  Tpetra::Details::OrdinalTraits<global_size_t>::invalid ();
316  // FIXME (mfh 05 Mar 2014) Doesn't the index base of a Map have to
317  // be the same as the Map's min GID? If the first column is empty
318  // (contains no entries), then the column Map's min GID won't
319  // necessarily be the same as the domain Map's index base.
320  const GO indexBase = domMap->getIndexBase ();
321  colMap = rcp (new map_type (INV, myColumns, indexBase, domMap->getComm ()));
322  return errCode;
323 }
324 
325 template <class LO, class GO, class NT>
326 int
327 makeColMap (Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >& colMap,
328  Teuchos::Array<int>& remotePIDs,
329  const Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >& domMap,
330  const RowGraph<LO, GO, NT>& graph,
331  const bool sortEachProcsGids,
332  std::ostream* errStrm)
333 {
334  using Teuchos::Array;
335  using Teuchos::ArrayView;
336  using Teuchos::rcp;
337  using std::endl;
338  typedef ::Tpetra::Map<LO, GO, NT> map_type;
339  const char prefix[] = "Tpetra::Details::makeColMap: ";
340  int errCode = 0;
341 
342  // If the input domain Map or its communicator is null on the
343  // calling process, then the calling process does not participate in
344  // the returned column Map. Thus, we can set the returned column
345  // Map to null on those processes, and return immediately. This is
346  // not an error condition, as long as when domMap and its
347  // communicator are NOT null, the graph's other Maps and
348  // communicator are not also null.
349  if (domMap.is_null () || domMap->getComm ().is_null ()) {
350  colMap = Teuchos::null;
351  return errCode;
352  }
353 
354  Array<GO> myColumns;
355  if (graph.isLocallyIndexed ()) {
356  colMap = graph.getColMap ();
357  // If the graph is locally indexed, it had better have a column Map.
358  // The extra check for ! graph.hasColMap() is conservative.
359  if (colMap.is_null () || ! graph.hasColMap ()) {
360  errCode = -1;
361  if (errStrm != NULL) {
362  *errStrm << prefix << "The graph is locally indexed on the calling "
363  "process, but has no column Map (either getColMap() returns null, "
364  "or hasColMap() returns false)." << endl;
365  }
366  // Under this error condition, this process will not fill
367  // myColumns. The resulting new column Map will be incorrect,
368  // but at least we won't hang, and this process will report the
369  // error.
370  }
371  else {
372  // The graph already has a column Map, and is locally indexed on
373  // the calling process. However, it may be globally indexed (or
374  // neither locally nor globally indexed) on other processes.
375  // Assume that we want to recreate the column Map.
376  if (colMap->isContiguous ()) {
377  // The number of indices on each process must fit in LO.
378  const LO numCurGids = static_cast<LO> (colMap->getLocalNumElements ());
379  myColumns.resize (numCurGids);
380  const GO myFirstGblInd = colMap->getMinGlobalIndex ();
381  for (LO k = 0; k < numCurGids; ++k) {
382  myColumns[k] = myFirstGblInd + static_cast<GO> (k);
383  }
384  }
385  else { // the column Map is NOT contiguous
386  ArrayView<const GO> curGids = graph.getColMap ()->getLocalElementList ();
387  // The number of indices on each process must fit in LO.
388  const LO numCurGids = static_cast<LO> (curGids.size ());
389  myColumns.resize (numCurGids);
390  for (LO k = 0; k < numCurGids; ++k) {
391  myColumns[k] = curGids[k];
392  }
393  } // whether the graph's current column Map is contiguous
394  } // does the graph currently have a column Map?
395  }
396  else if (graph.isGloballyIndexed ()) {
397  // Go through all the rows, finding the populated column indices.
398  //
399  // Our final list of indices for the column Map constructor will
400  // have the following properties (all of which are with respect to
401  // the calling process):
402  //
403  // 1. Indices in the domain Map go first.
404  // 2. Indices not in the domain Map follow, ordered first
405  // contiguously by their owning process rank (in the domain
406  // Map), then in increasing order within that.
407  // 3. No duplicate indices.
408  //
409  // This imitates the ordering used by Aztec(OO) and Epetra.
410  // Storing indices owned by the same process (in the domain Map)
411  // contiguously permits the use of contiguous send and receive
412  // buffers.
413  //
414  // We begin by partitioning the column indices into "local" GIDs
415  // (owned by the domain Map) and "remote" GIDs (not owned by the
416  // domain Map). We use the same order for local GIDs as the
417  // domain Map, so we track them in place in their array. We use
418  // an std::set (RemoteGIDSet) to keep track of remote GIDs, so
419  // that we don't have to merge duplicates later.
420  const LO LINV = Tpetra::Details::OrdinalTraits<LO>::invalid ();
421  size_t numLocalColGIDs = 0;
422  size_t numRemoteColGIDs = 0;
423 
424  // GIDisLocal[lid] is false if and only if local index lid in the
425  // domain Map is remote (not local).
426  std::vector<bool> GIDisLocal (domMap->getLocalNumElements (), false);
427  std::set<GO> RemoteGIDSet;
428  // This preserves the not-sorted Epetra order of GIDs.
429  // We only use this if sortEachProcsGids is false.
430  std::vector<GO> RemoteGIDUnorderedVector;
431 
432  if (! graph.getRowMap ().is_null ()) {
433  const Tpetra::Map<LO, GO, NT>& rowMap = * (graph.getRowMap ());
434  const LO lclNumRows = rowMap.getLocalNumElements ();
435  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
436  const GO gblRow = rowMap.getGlobalElement (lclRow);
437  typename RowGraph<LO,GO,NT>::global_inds_host_view_type rowGids;
438  graph.getGlobalRowView (gblRow, rowGids);
439 
440  const LO numEnt = static_cast<LO> (rowGids.size ());
441  if (numEnt != 0) {
442  for (LO k = 0; k < numEnt; ++k) {
443  const GO gid = rowGids[k];
444  const LO lid = domMap->getLocalElement (gid);
445  if (lid != LINV) {
446  const bool alreadyFound = GIDisLocal[lid];
447  if (! alreadyFound) {
448  GIDisLocal[lid] = true;
449  ++numLocalColGIDs;
450  }
451  }
452  else {
453  const bool notAlreadyFound = RemoteGIDSet.insert (gid).second;
454  if (notAlreadyFound) { // gid did not exist in the set before
455  if (! sortEachProcsGids) {
456  // The user doesn't want to sort remote GIDs (for each
457  // remote process); they want us to keep remote GIDs
458  // in their original order. We do this by stuffing
459  // each remote GID into an array as we encounter it
460  // for the first time. The std::set helpfully tracks
461  // first encounters.
462  RemoteGIDUnorderedVector.push_back (gid);
463  }
464  ++numRemoteColGIDs;
465  }
466  }
467  } // for each entry k in row r
468  } // if row r contains > 0 entries
469  } // for each locally owned row r
470  } // if the graph has a nonnull row Map
471 
472  return makeColMapImpl<LO, GO, NT>(
473  colMap, remotePIDs,
474  domMap,
475  numLocalColGIDs, numRemoteColGIDs,
476  RemoteGIDSet, RemoteGIDUnorderedVector, GIDisLocal,
477  sortEachProcsGids, errStrm);
478 
479  } // if the graph is globally indexed
480  else {
481  // If we reach this point, the graph is neither locally nor
482  // globally indexed. Thus, the graph is empty on this process
483  // (per the usual legacy Petra convention), so myColumns will be
484  // left empty.
485  ; // do nothing
486  }
487 
488  const global_size_t INV =
489  Tpetra::Details::OrdinalTraits<global_size_t>::invalid ();
490  // FIXME (mfh 05 Mar 2014) Doesn't the index base of a Map have to
491  // be the same as the Map's min GID? If the first column is empty
492  // (contains no entries), then the column Map's min GID won't
493  // necessarily be the same as the domain Map's index base.
494  const GO indexBase = domMap->getIndexBase ();
495  colMap = rcp (new map_type (INV, myColumns, indexBase, domMap->getComm ()));
496  return errCode;
497 }
498 
499 template<typename GOView, typename bitset_t>
500 struct GatherPresentEntries
501 {
502  using GO = typename GOView::non_const_value_type;
503 
504  GatherPresentEntries(GO minGID_, const GOView& gids_, const bitset_t& present_)
505  : minGID(minGID_), gids(gids_), present(present_)
506  {}
507 
508  KOKKOS_INLINE_FUNCTION void operator()(const GO i) const
509  {
510  present.set(gids(i) - minGID);
511  }
512 
513  GO minGID;
514  GOView gids;
515  bitset_t present;
516 };
517 
518 template<typename LO, typename GO, typename device_t, typename LocalMapType, typename const_bitset_t, bool doingRemotes>
519 struct ListGIDs
520 {
521  using mem_space = typename device_t::memory_space;
522  using GOView = Kokkos::View<GO*, mem_space>;
523  using SingleView = Kokkos::View<GO, mem_space>;
524 
525  ListGIDs(GO minGID_, GOView& gidList_, SingleView& numElems_, const_bitset_t& present_, const LocalMapType& localDomainMap_)
526  : minGID(minGID_), gidList(gidList_), numElems(numElems_), present(present_), localDomainMap(localDomainMap_)
527  {}
528 
529  KOKKOS_INLINE_FUNCTION void operator()(const GO i, GO& lcount, const bool finalPass) const
530  {
531  bool isRemote = localDomainMap.getLocalElement(i + minGID) == ::Tpetra::Details::OrdinalTraits<LO>::invalid();
532  if(present.test(i) && doingRemotes == isRemote)
533  {
534  if(finalPass)
535  {
536  //lcount is the index where this GID should be inserted in gidList.
537  gidList(lcount) = minGID + i;
538  }
539  lcount++;
540  }
541  if((i == static_cast<GO>(present.size() - 1)) && finalPass)
542  {
543  //Set the number of inserted indices in a single-element view
544  numElems() = lcount;
545  }
546  }
547 
548  GO minGID;
549  GOView gidList;
550  SingleView numElems;
551  const_bitset_t present;
552  const LocalMapType localDomainMap;
553 };
554 
555 template<typename GO, typename mem_space>
556 struct MinMaxReduceFunctor
557 {
558  using MinMaxValue = typename Kokkos::MinMax<GO>::value_type;
559  using GOView = Kokkos::View<GO*, mem_space>;
560 
561  MinMaxReduceFunctor(const GOView& gids_)
562  : gids(gids_)
563  {}
564 
565  KOKKOS_INLINE_FUNCTION void operator()(const GO i, MinMaxValue& lminmax) const
566  {
567  GO gid = gids(i);
568  if(gid < lminmax.min_val)
569  lminmax.min_val = gid;
570  if(gid > lminmax.max_val)
571  lminmax.max_val = gid;
572  };
573 
574  const GOView gids;
575 };
576 
577 template <class LO, class GO, class NT>
578 int
579 makeColMap (Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& colMap,
580  const Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& domMap,
581  Kokkos::View<GO*, typename NT::memory_space> gids,
582  std::ostream* errStrm)
583 {
584  using Teuchos::RCP;
585  using Teuchos::Array;
586  using Kokkos::RangePolicy;
587  using device_t = typename NT::device_type;
588  using exec_space = typename device_t::execution_space;
589  using memory_space = typename device_t::memory_space;
590  // Note BMK 5-2021: this is deliberately not just device_t.
591  // Bitset cannot use HIPHostPinnedSpace currently, so this needs to
592  // use the default memory space for HIP (HIPSpace). Using the default mem
593  // space is fine for all other backends too. This bitset type is only used
594  // in this function so it won't cause type mismatches.
595  using bitset_t = Kokkos::Bitset<typename exec_space::memory_space>;
596  using const_bitset_t = Kokkos::ConstBitset<typename exec_space::memory_space>;
597  using GOView = Kokkos::View<GO*, memory_space>;
598  using SingleView = Kokkos::View<GO, memory_space>;
599  using map_type = Tpetra::Map<LO, GO, NT>;
600  using LocalMap = typename map_type::local_map_type;
601  GO nentries = gids.extent(0);
602  GO minGID = Teuchos::OrdinalTraits<GO>::max();
603  GO maxGID = 0;
604  using MinMaxValue = typename Kokkos::MinMax<GO>::value_type;
605  MinMaxValue minMaxGID = {minGID, maxGID};
606  Kokkos::parallel_reduce(RangePolicy<exec_space>(0, nentries),
607  MinMaxReduceFunctor<GO, memory_space>(gids),
608  Kokkos::MinMax<GO>(minMaxGID));
609  minGID = minMaxGID.min_val; maxGID = minMaxGID.max_val;
610  //Now, know the full range of input GIDs.
611  //Determine the set of GIDs in the column map using a dense bitset, which corresponds to the range [minGID, maxGID]
612  bitset_t presentGIDs(maxGID - minGID + 1);
613  Kokkos::parallel_for(RangePolicy<exec_space>(0, nentries), GatherPresentEntries<GOView, bitset_t>(minGID, gids, presentGIDs));
614  const_bitset_t constPresentGIDs(presentGIDs);
615  //Get the set of local and remote GIDs on device
616  SingleView numLocals("Num local GIDs");
617  SingleView numRemotes("Num remote GIDs");
618  GOView localGIDView(Kokkos::ViewAllocateWithoutInitializing("Local GIDs"), constPresentGIDs.count());
619  GOView remoteGIDView(Kokkos::ViewAllocateWithoutInitializing("Remote GIDs"), constPresentGIDs.count());
620  LocalMap localDomMap = domMap->getLocalMap();
621  //This lists the locally owned GIDs in localGIDView
622  Kokkos::parallel_scan(RangePolicy<exec_space>(0, constPresentGIDs.size()),
623  ListGIDs<LO, GO, device_t, LocalMap, const_bitset_t, false>
624  (minGID, localGIDView, numLocals, constPresentGIDs, localDomMap));
625  //And this lists the remote GIDs in remoteGIDView
626  Kokkos::parallel_scan(RangePolicy<exec_space>(0, constPresentGIDs.size()),
627  ListGIDs<LO, GO, device_t, LocalMap, const_bitset_t, true>
628  (minGID, remoteGIDView, numRemotes, constPresentGIDs, localDomMap));
629  //Pull down the sizes
630  GO numLocalColGIDs = 0;
631  GO numRemoteColGIDs = 0;
632  // DEEP_COPY REVIEW - DEVICE-TO-VALUE
633  Kokkos::deep_copy(exec_space(), numLocalColGIDs, numLocals);
634  // DEEP_COPY REVIEW - DEVICE-TO-numLocalColGIDs
635  Kokkos::deep_copy(exec_space(), numRemoteColGIDs, numRemotes);
636  //Pull down the remote lists
637  auto localsHost = Kokkos::create_mirror_view(localGIDView);
638  auto remotesHost = Kokkos::create_mirror_view(remoteGIDView);
639  // DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
640  Kokkos::deep_copy(exec_space(), localsHost, localGIDView);
641  // DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
642  Kokkos::deep_copy(exec_space(), remotesHost, remoteGIDView);
643  // CAG: This fence was found to be required on Cuda with UVM=on.
644  Kokkos::fence("Tpetra::makeColMap");
645  //Finally, populate the STL structures which hold the index lists
646  std::set<GO> RemoteGIDSet;
647  std::vector<GO> RemoteGIDUnorderedVector;
648  std::vector<bool> GIDisLocal (domMap->getLocalNumElements (), false);
649  for(GO i = 0; i < numLocalColGIDs; i++)
650  {
651  GO gid = localsHost(i);
652  //Already know that gid is locally owned, so this index will never be invalid().
653  //makeColMapImpl uses this and the domain map to get the the local GID list.
654  GIDisLocal[domMap->getLocalElement(gid)] = true;
655  }
656  RemoteGIDUnorderedVector.reserve(numRemoteColGIDs);
657  for(GO i = 0; i < numRemoteColGIDs; i++)
658  {
659  GO gid = remotesHost(i);
660  RemoteGIDSet.insert(gid);
661  RemoteGIDUnorderedVector.push_back(gid);
662  }
663  //remotePIDs will be discarded in this version of makeColMap
664  Array<int> remotePIDs;
665  //Find the min and max GID
666  return makeColMapImpl<LO, GO, NT>(
667  colMap,
668  remotePIDs,
669  domMap,
670  static_cast<size_t>(numLocalColGIDs),
671  static_cast<size_t>(numRemoteColGIDs),
672  RemoteGIDSet,
673  RemoteGIDUnorderedVector,
674  GIDisLocal,
675  true, //always sort remotes
676  errStrm);
677 }
678 
679 } // namespace Details
680 } // namespace Tpetra
681 
682 //
683 // Explicit instantiation macros
684 //
685 // Must be expanded from within the Tpetra namespace!
686 //
687 #define TPETRA_DETAILS_MAKECOLMAP_INSTANT(LO,GO,NT) \
688  namespace Details { \
689  template int \
690  makeColMap (Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >&, \
691  Teuchos::Array<int>&, \
692  const Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >&, \
693  const RowGraph<LO, GO, NT>&, \
694  const bool, \
695  std::ostream*); \
696  template int \
697  makeColMap (Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >&, \
698  const Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >&, \
699  Kokkos::View<GO*, typename NT::memory_space>, \
700  std::ostream*); \
701  }
702 
703 #endif // TPETRA_DETAILS_MAKECOLMAP_DEF_HPP
virtual bool hasColMap() const =0
Whether the graph has a well-defined column Map.
An abstract interface for graphs accessed by rows.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
size_t getLocalNumElements() const
The number of elements belonging to the calling process.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getColMap() const =0
The Map that describes this graph&#39;s distribution of columns over processes.
&quot;Local&quot; part of Map suitable for Kokkos kernels.
size_t global_size_t
Global size_t object.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
The Map that describes this graph&#39;s distribution of rows over processes.
virtual bool isGloballyIndexed() const =0
If graph indices are in the global range, this function returns true. Otherwise, this function return...
virtual void getGlobalRowView(const GlobalOrdinal gblRow, global_inds_host_view_type &gblColInds) const =0
Get a const, non-persisting view of the given global row&#39;s global column indices, as a Teuchos::Array...
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2)
Sort the first array, and apply the resulting permutation to the second array.
A parallel distribution of indices over processes.
int makeColMap(Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &colMap, Teuchos::Array< int > &remotePIDs, const Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &domMap, const RowGraph< LO, GO, NT > &graph, const bool sortEachProcsGids=true, std::ostream *errStrm=NULL)
Make the graph&#39;s column Map.
Stand-alone utility functions and macros.
virtual bool isLocallyIndexed() const =0
If graph indices are in the local range, this function returns true. Otherwise, this function returns...