Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Import_Util2.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_IMPORT_UTIL2_HPP
11 #define TPETRA_IMPORT_UTIL2_HPP
12 
17 
18 #include "Tpetra_ConfigDefs.hpp"
19 #include "Tpetra_Import.hpp"
20 #include "Tpetra_HashTable.hpp"
21 #include "Tpetra_Map.hpp"
22 #include "Tpetra_Util.hpp"
23 #include "Tpetra_Distributor.hpp"
26 #include "Tpetra_Vector.hpp"
27 #include "Kokkos_DualView.hpp"
28 #include "KokkosSparse_SortCrs.hpp"
29 #include <Teuchos_Array.hpp>
31 #include <Kokkos_UnorderedMap.hpp>
32 #include <unordered_map>
33 #include <utility>
34 #include <set>
35 
37 
38 #include <Kokkos_Core.hpp>
39 #include <Kokkos_Sort.hpp>
40 
41 namespace Tpetra {
42 namespace Import_Util {
43 
46 template <typename Scalar, typename Ordinal>
47 void sortCrsEntries(const Teuchos::ArrayView<size_t>& CRS_rowptr,
48  const Teuchos::ArrayView<Ordinal>& CRS_colind,
49  const Teuchos::ArrayView<Scalar>& CRS_vals);
50 
51 template <typename Ordinal>
52 void sortCrsEntries(const Teuchos::ArrayView<size_t>& CRS_rowptr,
53  const Teuchos::ArrayView<Ordinal>& CRS_colind);
54 
55 template <typename rowptr_array_type, typename colind_array_type, typename vals_array_type>
56 void sortCrsEntries(const rowptr_array_type& CRS_rowptr,
57  const colind_array_type& CRS_colind,
58  const vals_array_type& CRS_vals);
59 
60 template <typename rowptr_array_type, typename colind_array_type>
61 void sortCrsEntries(const rowptr_array_type& CRS_rowptr,
62  const colind_array_type& CRS_colind);
63 
68 template <typename Scalar, typename Ordinal>
69 void sortAndMergeCrsEntries(const Teuchos::ArrayView<size_t>& CRS_rowptr,
70  const Teuchos::ArrayView<Ordinal>& CRS_colind,
71  const Teuchos::ArrayView<Scalar>& CRS_vals);
72 
73 template <typename Ordinal>
74 void sortAndMergeCrsEntries(const Teuchos::ArrayView<size_t>& CRS_rowptr,
75  const Teuchos::ArrayView<Ordinal>& CRS_colind);
76 
77 template <class rowptr_view_type, class colind_view_type, class vals_view_type>
78 void sortAndMergeCrsEntries(const rowptr_view_type& CRS_rowptr,
79  const colind_view_type& CRS_colind,
80  const vals_view_type& CRS_vals);
81 
97 template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
98 void lowCommunicationMakeColMapAndReindex(
99  const Teuchos::ArrayView<const size_t>& rowptr,
100  const Teuchos::ArrayView<LocalOrdinal>& colind_LID,
101  const Teuchos::ArrayView<GlobalOrdinal>& colind_GID,
102  const Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>& domainMapRCP,
103  const Teuchos::ArrayView<const int>& owningPIDs,
104  Teuchos::Array<int>& remotePIDs,
105  Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>& colMap);
106 
111 template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
112 void lowCommunicationMakeColMapAndReindex(
113  const Kokkos::View<size_t*, typename Node::device_type> rowptr_view,
114  const Kokkos::View<LocalOrdinal*, typename Node::device_type> colind_LID_view,
115  const Kokkos::View<GlobalOrdinal*, typename Node::device_type> colind_GID_view,
116  const Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>& domainMapRCP,
117  const Teuchos::ArrayView<const int>& owningPIDs,
118  Teuchos::Array<int>& remotePIDs,
119  Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>& colMap);
120 
134 template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
135 void getTwoTransferOwnershipVector(const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& transferThatDefinesOwnership,
136  bool useReverseModeForOwnership,
137  const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& transferForMigratingData,
138  bool useReverseModeForMigration,
140 
141 } // namespace Import_Util
142 } // namespace Tpetra
143 
144 //
145 // Implementations
146 //
147 
148 namespace Tpetra {
149 namespace Import_Util {
150 
151 template <typename PID, typename GlobalOrdinal>
152 bool sort_PID_then_GID(const std::pair<PID, GlobalOrdinal>& a,
153  const std::pair<PID, GlobalOrdinal>& b) {
154  if (a.first != b.first)
155  return (a.first < b.first);
156  return (a.second < b.second);
157 }
158 
159 template <typename PID,
160  typename GlobalOrdinal,
161  typename LocalOrdinal>
162 bool sort_PID_then_pair_GID_LID(const std::pair<PID, std::pair<GlobalOrdinal, LocalOrdinal>>& a,
163  const std::pair<PID, std::pair<GlobalOrdinal, LocalOrdinal>>& b) {
164  if (a.first != b.first)
165  return a.first < b.first;
166  else
167  return (a.second.first < b.second.first);
168 }
169 
170 template <typename Scalar,
171  typename LocalOrdinal,
172  typename GlobalOrdinal,
173  typename Node>
174 void reverseNeighborDiscovery(const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& SourceMatrix,
175  const typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::row_ptrs_host_view_type& rowptr,
176  const typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_inds_host_view_type& colind,
178  Teuchos::RCP<const Tpetra::Import<LocalOrdinal, GlobalOrdinal, Node>> MyImporter,
179  Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> MyDomainMap,
180  Teuchos::ArrayRCP<int>& type3PIDs,
181  Teuchos::ArrayRCP<LocalOrdinal>& type3LIDs,
182  Teuchos::RCP<const Teuchos::Comm<int>>& rcomm) {
183 #ifdef HAVE_TPETRACORE_MPI
184  using Teuchos::TimeMonitor;
185  using ::Tpetra::Details::Behavior;
186  typedef LocalOrdinal LO;
187  typedef GlobalOrdinal GO;
188  typedef std::pair<GO, GO> pidgidpair_t;
189  using Teuchos::RCP;
190  const std::string prefix{" Import_Util2::ReverseND:: "};
191  const std::string label("IU2::Neighbor");
192 
193  // There can be no neighbor discovery if you don't have an importer
194  if (MyImporter.is_null()) return;
195 
196  std::ostringstream errstr;
197  bool error = false;
198  auto const comm = MyDomainMap->getComm();
199 
200  MPI_Comm rawComm = getRawMpiComm(*comm);
201  const int MyPID = rcomm->getRank();
202 
203  // Things related to messages I am sending in forward mode (RowTransfer)
204  // *** Note: this will be incorrect for transferAndFillComplete if it is in reverse mode. FIXME cbl.
205  auto ExportPIDs = RowTransfer.getExportPIDs();
206  auto ExportLIDs = RowTransfer.getExportLIDs();
207  auto NumExportLIDs = RowTransfer.getNumExportIDs();
208 
209  Distributor& Distor = MyImporter->getDistributor();
210  const size_t NumRecvs = Distor.getNumReceives();
211  const size_t NumSends = Distor.getNumSends();
212  auto RemoteLIDs = MyImporter->getRemoteLIDs();
213  auto const ProcsFrom = Distor.getProcsFrom();
214  auto const ProcsTo = Distor.getProcsTo();
215 
216  auto LengthsFrom = Distor.getLengthsFrom();
217  auto MyColMap = SourceMatrix.getColMap();
218  const size_t numCols = MyColMap->getLocalNumElements();
219  RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> target = MyImporter->getTargetMap();
220 
221  // Get the owning pids in a special way,
222  // s.t. ProcsFrom[RemotePIDs[i]] is the proc that owns RemoteLIDs[j]....
223  Teuchos::Array<int> RemotePIDOrder(numCols, -1);
224 
225  // For each remote ID, record index into ProcsFrom, who owns it.
226  for (size_t i = 0, j = 0; i < NumRecvs; ++i) {
227  for (size_t k = 0; k < LengthsFrom[i]; ++k) {
228  const int pid = ProcsFrom[i];
229  if (pid != MyPID) {
230  RemotePIDOrder[RemoteLIDs[j]] = i;
231  }
232  j++;
233  }
234  }
235 
236  // Step One: Start tacking the (GID,PID) pairs on the std sets
237  //
238  // For each index in ProcsFrom, we will insert into a set of (PID,
239  // GID) pairs, in order to build a list of such pairs for each of
240  // those processes. Since this is building a reverse, we will send
241  // to these processes.
242  Teuchos::Array<int> ReverseSendSizes(NumRecvs, 0);
243  // do this as C array to avoid Teuchos::Array value initialization of all reserved memory
244  Teuchos::Array<Teuchos::ArrayRCP<pidgidpair_t>> RSB(NumRecvs);
245 
246  {
247 #ifdef HAVE_TPETRA_MMM_TIMINGS
248  TimeMonitor set_all(*TimeMonitor::getNewTimer(prefix + std::string("isMMallSetRSB")));
249 #endif
250 
251  // 25 Jul 2018: CBL
252  // todo:std::unordered_set (hash table),
253  // with an adequate prereservation ("bucket count").
254  // An onordered_set has to have a custom hasher for pid/gid pair
255  // However, when pidsets is copied to RSB, it will be in key
256  // order _not_ in pid,gid order. (unlike std::set).
257  // Impliment this with a reserve, and time BOTH building pidsets
258  // _and_ the sort after the receive. Even if unordered_set saves
259  // time, if it causes the sort to be longer, it's not a win.
260 
261  Teuchos::Array<std::set<pidgidpair_t>> pidsets(NumRecvs);
262  {
263 #ifdef HAVE_TPETRA_MMM_TIMINGS
264  TimeMonitor set_insert(*TimeMonitor::getNewTimer(prefix + std::string("isMMallSetRSBinsert")));
265 #endif
266  for (size_t i = 0; i < NumExportLIDs; i++) {
267  LO lid = ExportLIDs[i];
268  GO exp_pid = ExportPIDs[i];
269  for (auto j = rowptr[lid]; j < rowptr[lid + 1]; j++) {
270  int pid_order = RemotePIDOrder[colind[j]];
271  if (pid_order != -1) {
272  GO gid = MyColMap->getGlobalElement(colind[j]); // Epetra SM.GCID46 =>sm->graph-> {colmap(colind)}
273  auto tpair = pidgidpair_t(exp_pid, gid);
274  pidsets[pid_order].insert(pidsets[pid_order].end(), tpair);
275  }
276  }
277  }
278  }
279 
280  {
281 #ifdef HAVE_TPETRA_MMM_TIMINGS
282  TimeMonitor set_cpy(*TimeMonitor::getNewTimer(prefix + std::string("isMMallSetRSBcpy")));
283 #endif
284  int jj = 0;
285  for (auto&& ps : pidsets) {
286  auto s = ps.size();
287  RSB[jj] = Teuchos::arcp(new pidgidpair_t[s], 0, s, true);
288  std::copy(ps.begin(), ps.end(), RSB[jj]);
289  ReverseSendSizes[jj] = s;
290  ++jj;
291  }
292  }
293  } // end of set based packing.
294 
295  Teuchos::Array<int> ReverseRecvSizes(NumSends, -1);
296  Teuchos::Array<MPI_Request> rawBreq(ProcsFrom.size() + ProcsTo.size(), MPI_REQUEST_NULL);
297  // 25 Jul 2018: MPI_TAG_UB is the largest tag value; could be < 32768.
298  const int mpi_tag_base_ = 3;
299 
300  int mpireq_idx = 0;
301  for (int i = 0; i < ProcsTo.size(); ++i) {
302  int Rec_Tag = mpi_tag_base_ + ProcsTo[i];
303  int* thisrecv = (int*)(&ReverseRecvSizes[i]);
304  MPI_Request rawRequest = MPI_REQUEST_NULL;
305  MPI_Irecv(const_cast<int*>(thisrecv),
306  1,
307  MPI_INT,
308  ProcsTo[i],
309  Rec_Tag,
310  rawComm,
311  &rawRequest);
312  rawBreq[mpireq_idx++] = rawRequest;
313  }
314  for (int i = 0; i < ProcsFrom.size(); ++i) {
315  int Send_Tag = mpi_tag_base_ + MyPID;
316  int* mysend = (int*)(&ReverseSendSizes[i]);
317  MPI_Request rawRequest = MPI_REQUEST_NULL;
318  MPI_Isend(mysend,
319  1,
320  MPI_INT,
321  ProcsFrom[i],
322  Send_Tag,
323  rawComm,
324  &rawRequest);
325  rawBreq[mpireq_idx++] = rawRequest;
326  }
327  Teuchos::Array<MPI_Status> rawBstatus(rawBreq.size());
328 #ifdef HAVE_TPETRA_DEBUG
329  const int err1 =
330 #endif
331  MPI_Waitall(rawBreq.size(), rawBreq.getRawPtr(),
332  rawBstatus.getRawPtr());
333 
334 #ifdef HAVE_TPETRA_DEBUG
335  if (err1) {
336  errstr << MyPID << "sE1 reverseNeighborDiscovery Mpi_Waitall error on send ";
337  error = true;
338  std::cerr << errstr.str() << std::flush;
339  }
340 #endif
341 
342  int totalexportpairrecsize = 0;
343  for (size_t i = 0; i < NumSends; ++i) {
344  totalexportpairrecsize += ReverseRecvSizes[i];
345 #ifdef HAVE_TPETRA_DEBUG
346  if (ReverseRecvSizes[i] < 0) {
347  errstr << MyPID << "E4 reverseNeighborDiscovery: got < 0 for receive size " << ReverseRecvSizes[i] << std::endl;
348  error = true;
349  }
350 #endif
351  }
352  Teuchos::ArrayRCP<pidgidpair_t> AllReverseRecv = Teuchos::arcp(new pidgidpair_t[totalexportpairrecsize], 0, totalexportpairrecsize, true);
353  int offset = 0;
354  mpireq_idx = 0;
355  for (int i = 0; i < ProcsTo.size(); ++i) {
356  int recv_data_size = ReverseRecvSizes[i] * 2;
357  int recvData_MPI_Tag = mpi_tag_base_ * 2 + ProcsTo[i];
358  MPI_Request rawRequest = MPI_REQUEST_NULL;
359  GO* rec_bptr = (GO*)(&AllReverseRecv[offset]);
360  offset += ReverseRecvSizes[i];
361  MPI_Irecv(rec_bptr,
362  recv_data_size,
363  ::Tpetra::Details::MpiTypeTraits<GO>::getType(rec_bptr[0]),
364  ProcsTo[i],
365  recvData_MPI_Tag,
366  rawComm,
367  &rawRequest);
368  rawBreq[mpireq_idx++] = rawRequest;
369  }
370  for (int ii = 0; ii < ProcsFrom.size(); ++ii) {
371  GO* send_bptr = (GO*)(RSB[ii].getRawPtr());
372  MPI_Request rawSequest = MPI_REQUEST_NULL;
373  int send_data_size = ReverseSendSizes[ii] * 2; // 2 == count of pair
374  int sendData_MPI_Tag = mpi_tag_base_ * 2 + MyPID;
375  MPI_Isend(send_bptr,
376  send_data_size,
377  ::Tpetra::Details::MpiTypeTraits<GO>::getType(send_bptr[0]),
378  ProcsFrom[ii],
379  sendData_MPI_Tag,
380  rawComm,
381  &rawSequest);
382 
383  rawBreq[mpireq_idx++] = rawSequest;
384  }
385 #ifdef HAVE_TPETRA_DEBUG
386  const int err =
387 #endif
388  MPI_Waitall(rawBreq.size(),
389  rawBreq.getRawPtr(),
390  rawBstatus.getRawPtr());
391 #ifdef HAVE_TPETRA_DEBUG
392  if (err) {
393  errstr << MyPID << "E3.r reverseNeighborDiscovery Mpi_Waitall error on receive ";
394  error = true;
395  std::cerr << errstr.str() << std::flush;
396  }
397 #endif
398  std::sort(AllReverseRecv.begin(), AllReverseRecv.end(), Tpetra::Import_Util::sort_PID_then_GID<GlobalOrdinal, GlobalOrdinal>);
399 
400  auto newEndOfPairs = std::unique(AllReverseRecv.begin(), AllReverseRecv.end());
401  // don't resize to remove non-unique, just use the end-of-unique iterator
402  if (AllReverseRecv.begin() == newEndOfPairs) return;
403  int ARRsize = std::distance(AllReverseRecv.begin(), newEndOfPairs);
404  auto rPIDs = Teuchos::arcp(new int[ARRsize], 0, ARRsize, true);
405  auto rLIDs = Teuchos::arcp(new LocalOrdinal[ARRsize], 0, ARRsize, true);
406 
407  int tsize = 0;
408  for (auto itr = AllReverseRecv.begin(); itr != newEndOfPairs; ++itr) {
409  if ((int)(itr->first) != MyPID) {
410  rPIDs[tsize] = (int)itr->first;
411  LocalOrdinal lid = MyDomainMap->getLocalElement(itr->second);
412  rLIDs[tsize] = lid;
413  tsize++;
414  }
415  }
416 
417  type3PIDs = rPIDs.persistingView(0, tsize);
418  type3LIDs = rLIDs.persistingView(0, tsize);
419 
420  if (error) {
421  std::cerr << errstr.str() << std::flush;
422  comm->barrier();
423  comm->barrier();
424  comm->barrier();
425  MPI_Abort(MPI_COMM_WORLD, -1);
426  }
427 #endif
428 }
429 
430 // Note: This should get merged with the other Tpetra sort routines eventually.
431 template <typename Scalar, typename Ordinal>
432 void sortCrsEntries(const Teuchos::ArrayView<size_t>& CRS_rowptr,
433  const Teuchos::ArrayView<Ordinal>& CRS_colind,
434  const Teuchos::ArrayView<Scalar>& CRS_vals) {
435  // For each row, sort column entries from smallest to largest.
436  // Use shell sort. Stable sort so it is fast if indices are already sorted.
437  // Code copied from Epetra_CrsMatrix::SortEntries()
438  size_t NumRows = CRS_rowptr.size() - 1;
439  size_t nnz = CRS_colind.size();
440 
441  const bool permute_values_array = CRS_vals.size() > 0;
442 
443  for (size_t i = 0; i < NumRows; i++) {
444  size_t start = CRS_rowptr[i];
445  if (start >= nnz) continue;
446 
447  size_t NumEntries = CRS_rowptr[i + 1] - start;
448  Teuchos::ArrayRCP<Scalar> locValues;
449  if (permute_values_array)
450  locValues = Teuchos::arcp<Scalar>(&CRS_vals[start], 0, NumEntries, false);
451  Teuchos::ArrayRCP<Ordinal> locIndices(&CRS_colind[start], 0, NumEntries, false);
452 
453  Ordinal n = NumEntries;
454  Ordinal m = 1;
455  while (m < n) m = m * 3 + 1;
456  m /= 3;
457 
458  while (m > 0) {
459  Ordinal max = n - m;
460  for (Ordinal j = 0; j < max; j++) {
461  for (Ordinal k = j; k >= 0; k -= m) {
462  if (locIndices[k + m] >= locIndices[k])
463  break;
464  if (permute_values_array) {
465  Scalar dtemp = locValues[k + m];
466  locValues[k + m] = locValues[k];
467  locValues[k] = dtemp;
468  }
469  Ordinal itemp = locIndices[k + m];
470  locIndices[k + m] = locIndices[k];
471  locIndices[k] = itemp;
472  }
473  }
474  m = m / 3;
475  }
476  }
477 }
478 
479 template <typename Ordinal>
480 void sortCrsEntries(const Teuchos::ArrayView<size_t>& CRS_rowptr,
481  const Teuchos::ArrayView<Ordinal>& CRS_colind) {
482  // Generate dummy values array
483  Teuchos::ArrayView<Tpetra::Details::DefaultTypes::scalar_type> CRS_vals;
484  sortCrsEntries(CRS_rowptr, CRS_colind, CRS_vals);
485 }
486 
487 namespace Impl {
488 
489 template <class RowOffsetsType, class ColumnIndicesType, class ValuesType>
490 class SortCrsEntries {
491  private:
492  typedef typename ColumnIndicesType::non_const_value_type ordinal_type;
493  typedef typename ValuesType::non_const_value_type scalar_type;
494 
495  public:
496  SortCrsEntries(const RowOffsetsType& ptr,
497  const ColumnIndicesType& ind,
498  const ValuesType& val)
499  : ptr_(ptr)
500  , ind_(ind)
501  , val_(val) {
502  static_assert(std::is_signed<ordinal_type>::value,
503  "The type of each "
504  "column index -- that is, the type of each entry of ind "
505  "-- must be signed in order for this functor to work.");
506  }
507 
508  KOKKOS_FUNCTION void operator()(const size_t i) const {
509  const size_t nnz = ind_.extent(0);
510  const size_t start = ptr_(i);
511  const bool permute_values_array = val_.extent(0) > 0;
512 
513  if (start < nnz) {
514  const size_t NumEntries = ptr_(i + 1) - start;
515 
516  const ordinal_type n = static_cast<ordinal_type>(NumEntries);
517  ordinal_type m = 1;
518  while (m < n) m = m * 3 + 1;
519  m /= 3;
520 
521  while (m > 0) {
522  ordinal_type max = n - m;
523  for (ordinal_type j = 0; j < max; j++) {
524  for (ordinal_type k = j; k >= 0; k -= m) {
525  const size_t sk = start + k;
526  if (ind_(sk + m) >= ind_(sk)) {
527  break;
528  }
529  if (permute_values_array) {
530  const scalar_type dtemp = val_(sk + m);
531  val_(sk + m) = val_(sk);
532  val_(sk) = dtemp;
533  }
534  const ordinal_type itemp = ind_(sk + m);
535  ind_(sk + m) = ind_(sk);
536  ind_(sk) = itemp;
537  }
538  }
539  m = m / 3;
540  }
541  }
542  }
543 
544  static void
545  sortCrsEntries(const RowOffsetsType& ptr,
546  const ColumnIndicesType& ind,
547  const ValuesType& val) {
548  // For each row, sort column entries from smallest to largest.
549  // Use shell sort. Stable sort so it is fast if indices are already sorted.
550  // Code copied from Epetra_CrsMatrix::SortEntries()
551  // NOTE: This should not be taken as a particularly efficient way to sort
552  // rows of matrices in parallel. But it is correct, so that's something.
553  if (ptr.extent(0) == 0) {
554  return; // no rows, so nothing to sort
555  }
556  const size_t NumRows = ptr.extent(0) - 1;
557 
558  typedef size_t index_type; // what this function was using; not my choice
559  typedef typename ValuesType::execution_space execution_space;
560  // Specify RangePolicy explicitly, in order to use correct execution
561  // space. See #1345.
562  typedef Kokkos::RangePolicy<execution_space, index_type> range_type;
563  Kokkos::parallel_for("sortCrsEntries", range_type(0, NumRows),
564  SortCrsEntries(ptr, ind, val));
565  }
566 
567  private:
568  RowOffsetsType ptr_;
569  ColumnIndicesType ind_;
570  ValuesType val_;
571 };
572 
573 } // namespace Impl
574 
575 template <typename rowptr_array_type, typename colind_array_type, typename vals_array_type>
576 void sortCrsEntries(const rowptr_array_type& CRS_rowptr,
577  const colind_array_type& CRS_colind,
578  const vals_array_type& CRS_vals) {
579  Impl::SortCrsEntries<rowptr_array_type, colind_array_type,
580  vals_array_type>::sortCrsEntries(CRS_rowptr, CRS_colind, CRS_vals);
581 }
582 
583 template <typename rowptr_array_type, typename colind_array_type>
584 void sortCrsEntries(const rowptr_array_type& CRS_rowptr,
585  const colind_array_type& CRS_colind) {
586  // Generate dummy values array
587  typedef typename colind_array_type::execution_space execution_space;
588  typedef Tpetra::Details::DefaultTypes::scalar_type scalar_type;
589  typedef typename Kokkos::View<scalar_type*, execution_space> scalar_view_type;
590  scalar_view_type CRS_vals;
591  sortCrsEntries<rowptr_array_type, colind_array_type,
592  scalar_view_type>(CRS_rowptr, CRS_colind, CRS_vals);
593 }
594 
595 // Note: This should get merged with the other Tpetra sort routines eventually.
596 template <typename Scalar, typename Ordinal>
597 void sortAndMergeCrsEntries(const Teuchos::ArrayView<size_t>& CRS_rowptr,
598  const Teuchos::ArrayView<Ordinal>& CRS_colind,
599  const Teuchos::ArrayView<Scalar>& CRS_vals) {
600  // For each row, sort column entries from smallest to largest,
601  // merging column ids that are identify by adding values. Use shell
602  // sort. Stable sort so it is fast if indices are already sorted.
603  // Code copied from Epetra_CrsMatrix::SortEntries()
604 
605  if (CRS_rowptr.size() == 0) {
606  return; // no rows, so nothing to sort
607  }
608  const size_t NumRows = CRS_rowptr.size() - 1;
609  const size_t nnz = CRS_colind.size();
610  size_t new_curr = CRS_rowptr[0];
611  size_t old_curr = CRS_rowptr[0];
612 
613  const bool permute_values_array = CRS_vals.size() > 0;
614 
615  for (size_t i = 0; i < NumRows; i++) {
616  const size_t old_rowptr_i = CRS_rowptr[i];
617  CRS_rowptr[i] = old_curr;
618  if (old_rowptr_i >= nnz) continue;
619 
620  size_t NumEntries = CRS_rowptr[i + 1] - old_rowptr_i;
621  Teuchos::ArrayRCP<Scalar> locValues;
622  if (permute_values_array)
623  locValues = Teuchos::arcp<Scalar>(&CRS_vals[old_rowptr_i], 0, NumEntries, false);
624  Teuchos::ArrayRCP<Ordinal> locIndices(&CRS_colind[old_rowptr_i], 0, NumEntries, false);
625 
626  // Sort phase
627  Ordinal n = NumEntries;
628  Ordinal m = n / 2;
629 
630  while (m > 0) {
631  Ordinal max = n - m;
632  for (Ordinal j = 0; j < max; j++) {
633  for (Ordinal k = j; k >= 0; k -= m) {
634  if (locIndices[k + m] >= locIndices[k])
635  break;
636  if (permute_values_array) {
637  Scalar dtemp = locValues[k + m];
638  locValues[k + m] = locValues[k];
639  locValues[k] = dtemp;
640  }
641  Ordinal itemp = locIndices[k + m];
642  locIndices[k + m] = locIndices[k];
643  locIndices[k] = itemp;
644  }
645  }
646  m = m / 2;
647  }
648 
649  // Merge & shrink
650  for (size_t j = old_rowptr_i; j < CRS_rowptr[i + 1]; j++) {
651  if (j > old_rowptr_i && CRS_colind[j] == CRS_colind[new_curr - 1]) {
652  if (permute_values_array) CRS_vals[new_curr - 1] += CRS_vals[j];
653  } else if (new_curr == j) {
654  new_curr++;
655  } else {
656  CRS_colind[new_curr] = CRS_colind[j];
657  if (permute_values_array) CRS_vals[new_curr] = CRS_vals[j];
658  new_curr++;
659  }
660  }
661  old_curr = new_curr;
662  }
663 
664  CRS_rowptr[NumRows] = new_curr;
665 }
666 
667 template <typename Ordinal>
668 void sortAndMergeCrsEntries(const Teuchos::ArrayView<size_t>& CRS_rowptr,
669  const Teuchos::ArrayView<Ordinal>& CRS_colind) {
670  Teuchos::ArrayView<Tpetra::Details::DefaultTypes::scalar_type> CRS_vals;
671  return sortAndMergeCrsEntries<Tpetra::Details::DefaultTypes::scalar_type, Ordinal>(CRS_rowptr, CRS_colind, CRS_vals);
672 }
673 
674 template <class rowptr_view_type, class colind_view_type, class vals_view_type>
675 void sortAndMergeCrsEntries(rowptr_view_type& CRS_rowptr,
676  colind_view_type& CRS_colind,
677  vals_view_type& CRS_vals) {
678  using execution_space = typename vals_view_type::execution_space;
679 
680  auto CRS_rowptr_in = CRS_rowptr;
681  auto CRS_colind_in = CRS_colind;
682  auto CRS_vals_in = CRS_vals;
683 
684  KokkosSparse::sort_and_merge_matrix<execution_space, rowptr_view_type,
685  colind_view_type, vals_view_type>(CRS_rowptr_in, CRS_colind_in, CRS_vals_in,
686  CRS_rowptr, CRS_colind, CRS_vals);
687 }
688 
689 template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
690 void lowCommunicationMakeColMapAndReindexSerial(const Teuchos::ArrayView<const size_t>& rowptr,
691  const Teuchos::ArrayView<LocalOrdinal>& colind_LID,
692  const Teuchos::ArrayView<GlobalOrdinal>& colind_GID,
693  const Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>& domainMapRCP,
694  const Teuchos::ArrayView<const int>& owningPIDs,
695  Teuchos::Array<int>& remotePIDs,
696  Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>& colMap) {
697  using Teuchos::rcp;
698  typedef LocalOrdinal LO;
699  typedef GlobalOrdinal GO;
700  typedef Tpetra::global_size_t GST;
701  typedef Tpetra::Map<LO, GO, Node> map_type;
702  const char prefix[] = "lowCommunicationMakeColMapAndReindexSerial: ";
703 
704  // The domainMap is an RCP because there is a shortcut for a
705  // (common) special case to return the columnMap = domainMap.
706  const map_type& domainMap = *domainMapRCP;
707 
708  // Scan all column indices and sort into two groups:
709  // Local: those whose GID matches a GID of the domain map on this processor and
710  // Remote: All others.
711  const size_t numDomainElements = domainMap.getLocalNumElements();
712  Teuchos::Array<bool> LocalGIDs;
713  if (numDomainElements > 0) {
714  LocalGIDs.resize(numDomainElements, false); // Assume domain GIDs are not local
715  }
716 
717  // In principle it is good to have RemoteGIDs and RemotGIDList be as
718  // long as the number of remote GIDs on this processor, but this
719  // would require two passes through the column IDs, so we make it
720  // the max of 100 and the number of block rows.
721  //
722  // FIXME (mfh 11 Feb 2015) Tpetra::Details::HashTable can hold at
723  // most INT_MAX entries, but it's possible to have more rows than
724  // that (if size_t is 64 bits and int is 32 bits).
725  const size_t numMyRows = rowptr.size() - 1;
726  const int hashsize = std::max(static_cast<int>(numMyRows), 100);
727 
728  Tpetra::Details::HashTable<GO, LO> RemoteGIDs(hashsize);
729  Teuchos::Array<GO> RemoteGIDList;
730  RemoteGIDList.reserve(hashsize);
731  Teuchos::Array<int> PIDList;
732  PIDList.reserve(hashsize);
733 
734  // Here we start using the *LocalOrdinal* colind_LID array. This is
735  // safe even if both columnIndices arrays are actually the same
736  // (because LocalOrdinal==GO). For *local* GID's set
737  // colind_LID with with their LID in the domainMap. For *remote*
738  // GIDs, we set colind_LID with (numDomainElements+NumRemoteColGIDs)
739  // before the increment of the remote count. These numberings will
740  // be separate because no local LID is greater than
741  // numDomainElements.
742 
743  size_t NumLocalColGIDs = 0;
744  LO NumRemoteColGIDs = 0;
745  for (size_t i = 0; i < numMyRows; ++i) {
746  for (size_t j = rowptr[i]; j < rowptr[i + 1]; ++j) {
747  const GO GID = colind_GID[j];
748  // Check if GID matches a row GID
749  const LO LID = domainMap.getLocalElement(GID);
750  if (LID != -1) {
751  const bool alreadyFound = LocalGIDs[LID];
752  if (!alreadyFound) {
753  LocalGIDs[LID] = true; // There is a column in the graph associated with this domain map GID
754  NumLocalColGIDs++;
755  }
756  colind_LID[j] = LID;
757  } else {
758  const LO hash_value = RemoteGIDs.get(GID);
759  if (hash_value == -1) { // This means its a new remote GID
760  const int PID = owningPIDs[j];
761  TEUCHOS_TEST_FOR_EXCEPTION(
762  PID == -1, std::invalid_argument, prefix << "Cannot figure out if "
763  "PID is owned.");
764  colind_LID[j] = static_cast<LO>(numDomainElements + NumRemoteColGIDs);
765  RemoteGIDs.add(GID, NumRemoteColGIDs);
766  RemoteGIDList.push_back(GID);
767  PIDList.push_back(PID);
768  NumRemoteColGIDs++;
769  } else {
770  colind_LID[j] = static_cast<LO>(numDomainElements + hash_value);
771  }
772  }
773  }
774  }
775 
776  // Possible short-circuit: If all domain map GIDs are present as
777  // column indices, then set ColMap=domainMap and quit.
778  if (domainMap.getComm()->getSize() == 1) {
779  // Sanity check: When there is only one process, there can be no
780  // remoteGIDs.
781  TEUCHOS_TEST_FOR_EXCEPTION(
782  NumRemoteColGIDs != 0, std::runtime_error, prefix << "There is only one "
783  "process in the domain Map's communicator, which means that there are no "
784  "\"remote\" indices. Nevertheless, some column indices are not in the "
785  "domain Map.");
786  if (static_cast<size_t>(NumLocalColGIDs) == numDomainElements) {
787  // In this case, we just use the domainMap's indices, which is,
788  // not coincidently, what we clobbered colind with up above
789  // anyway. No further reindexing is needed.
790  colMap = domainMapRCP;
791  return;
792  }
793  }
794 
795  // Now build the array containing column GIDs
796  // Build back end, containing remote GIDs, first
797  const LO numMyCols = NumLocalColGIDs + NumRemoteColGIDs;
798  Teuchos::Array<GO> ColIndices;
799  GO* RemoteColIndices = NULL;
800  if (numMyCols > 0) {
801  ColIndices.resize(numMyCols);
802  if (NumLocalColGIDs != static_cast<size_t>(numMyCols)) {
803  RemoteColIndices = &ColIndices[NumLocalColGIDs]; // Points to back half of ColIndices
804  }
805  }
806 
807  for (LO i = 0; i < NumRemoteColGIDs; ++i) {
808  RemoteColIndices[i] = RemoteGIDList[i];
809  }
810 
811  // Build permute array for *remote* reindexing.
812  Teuchos::Array<LO> RemotePermuteIDs(NumRemoteColGIDs);
813  for (LO i = 0; i < NumRemoteColGIDs; ++i) {
814  RemotePermuteIDs[i] = i;
815  }
816 
817  // Sort External column indices so that all columns coming from a
818  // given remote processor are contiguous. This is a sort with two
819  // auxilary arrays: RemoteColIndices and RemotePermuteIDs.
820  Tpetra::sort3(PIDList.begin(), PIDList.end(),
821  ColIndices.begin() + NumLocalColGIDs,
822  RemotePermuteIDs.begin());
823 
824  // Stash the RemotePIDs.
825  //
826  // Note: If Teuchos::Array had a shrink_to_fit like std::vector,
827  // we'd call it here.
828  remotePIDs = PIDList;
829 
830  // Sort external column indices so that columns from a given remote
831  // processor are not only contiguous but also in ascending
832  // order. NOTE: I don't know if the number of externals associated
833  // with a given remote processor is known at this point ... so I
834  // count them here.
835 
836  // NTS: Only sort the RemoteColIndices this time...
837  LO StartCurrent = 0, StartNext = 1;
838  while (StartNext < NumRemoteColGIDs) {
839  if (PIDList[StartNext] == PIDList[StartNext - 1]) {
840  StartNext++;
841  } else {
842  Tpetra::sort2(ColIndices.begin() + NumLocalColGIDs + StartCurrent,
843  ColIndices.begin() + NumLocalColGIDs + StartNext,
844  RemotePermuteIDs.begin() + StartCurrent);
845  StartCurrent = StartNext;
846  StartNext++;
847  }
848  }
849  Tpetra::sort2(ColIndices.begin() + NumLocalColGIDs + StartCurrent,
850  ColIndices.begin() + NumLocalColGIDs + StartNext,
851  RemotePermuteIDs.begin() + StartCurrent);
852 
853  // Reverse the permutation to get the information we actually care about
854  Teuchos::Array<LO> ReverseRemotePermuteIDs(NumRemoteColGIDs);
855  for (LO i = 0; i < NumRemoteColGIDs; ++i) {
856  ReverseRemotePermuteIDs[RemotePermuteIDs[i]] = i;
857  }
858 
859  // Build permute array for *local* reindexing.
860  bool use_local_permute = false;
861  Teuchos::Array<LO> LocalPermuteIDs(numDomainElements);
862 
863  // Now fill front end. Two cases:
864  //
865  // (1) If the number of Local column GIDs is the same as the number
866  // of Local domain GIDs, we can simply read the domain GIDs into
867  // the front part of ColIndices, otherwise
868  //
869  // (2) We step through the GIDs of the domainMap, checking to see if
870  // each domain GID is a column GID. we want to do this to
871  // maintain a consistent ordering of GIDs between the columns
872  // and the domain.
873  Teuchos::ArrayView<const GO> domainGlobalElements = domainMap.getLocalElementList();
874  if (static_cast<size_t>(NumLocalColGIDs) == numDomainElements) {
875  if (NumLocalColGIDs > 0) {
876  // Load Global Indices into first numMyCols elements column GID list
877  std::copy(domainGlobalElements.begin(), domainGlobalElements.end(),
878  ColIndices.begin());
879  }
880  } else {
881  LO NumLocalAgain = 0;
882  use_local_permute = true;
883  for (size_t i = 0; i < numDomainElements; ++i) {
884  if (LocalGIDs[i]) {
885  LocalPermuteIDs[i] = NumLocalAgain;
886  ColIndices[NumLocalAgain++] = domainGlobalElements[i];
887  }
888  }
889  TEUCHOS_TEST_FOR_EXCEPTION(
890  static_cast<size_t>(NumLocalAgain) != NumLocalColGIDs,
891  std::runtime_error, prefix << "Local ID count test failed.");
892  }
893 
894  // Make column Map
895  const GST minus_one = Teuchos::OrdinalTraits<GST>::invalid();
896  colMap = rcp(new map_type(minus_one, ColIndices, domainMap.getIndexBase(),
897  domainMap.getComm()));
898 
899  // Low-cost reindex of the matrix
900  for (size_t i = 0; i < numMyRows; ++i) {
901  for (size_t j = rowptr[i]; j < rowptr[i + 1]; ++j) {
902  const LO ID = colind_LID[j];
903  if (static_cast<size_t>(ID) < numDomainElements) {
904  if (use_local_permute) {
905  colind_LID[j] = LocalPermuteIDs[colind_LID[j]];
906  }
907  // In the case where use_local_permute==false, we just copy
908  // the DomainMap's ordering, which it so happens is what we
909  // put in colind_LID to begin with.
910  } else {
911  colind_LID[j] = NumLocalColGIDs + ReverseRemotePermuteIDs[colind_LID[j] - numDomainElements];
912  }
913  }
914  }
915 }
916 
917 template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
918 void lowCommunicationMakeColMapAndReindex(
919  const Teuchos::ArrayView<const size_t>& rowptr,
920  const Teuchos::ArrayView<LocalOrdinal>& colind_LID,
921  const Teuchos::ArrayView<GlobalOrdinal>& colind_GID,
922  const Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>& domainMapRCP,
923  const Teuchos::ArrayView<const int>& owningPIDs,
924  Teuchos::Array<int>& remotePIDs,
925  Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>& colMap) {
926  using Teuchos::rcp;
927  typedef LocalOrdinal LO;
928  typedef GlobalOrdinal GO;
929  typedef Tpetra::global_size_t GST;
930  typedef Tpetra::Map<LO, GO, Node> map_type;
931  const char prefix[] = "lowCommunicationMakeColMapAndReindex: ";
932 
933  typedef typename Node::device_type DT;
934  using execution_space = typename DT::execution_space;
935  execution_space exec;
936  using team_policy = Kokkos::TeamPolicy<execution_space, Kokkos::Schedule<Kokkos::Dynamic>>;
937  typedef typename map_type::local_map_type local_map_type;
938 
939  // Create device mirror and host mirror views from function parameters
940  // When we pass in views instead of Teuchos::ArrayViews, we can avoid copying views
941  auto colind_LID_view = Details::create_mirror_view_from_raw_host_array(exec, colind_LID.getRawPtr(), colind_LID.size(), true, "colind_LID");
942  auto rowptr_view = Details::create_mirror_view_from_raw_host_array(exec, rowptr.getRawPtr(), rowptr.size(), true, "rowptr");
943  auto colind_GID_view = Details::create_mirror_view_from_raw_host_array(exec, colind_GID.getRawPtr(), colind_GID.size(), true, "colind_GID");
944  auto owningPIDs_view = Details::create_mirror_view_from_raw_host_array(exec, owningPIDs.getRawPtr(), owningPIDs.size(), true, "owningPIDs");
945 
946  typename decltype(colind_LID_view)::host_mirror_type colind_LID_host(colind_LID.getRawPtr(), colind_LID.size());
947  typename decltype(colind_GID_view)::host_mirror_type colind_GID_host(colind_GID.getRawPtr(), colind_GID.size());
948 
949  Kokkos::deep_copy(colind_LID_view, colind_LID_host);
950  Kokkos::deep_copy(colind_GID_view, colind_GID_host);
951 
952  // The domainMap is an RCP because there is a shortcut for a
953  // (common) special case to return the columnMap = domainMap.
954  const map_type& domainMap = *domainMapRCP;
955 
956  Kokkos::UnorderedMap<LO, bool, DT> LocalGIDs_view_map(colind_LID.size());
957  Kokkos::UnorderedMap<GO, LO, DT> RemoteGIDs_view_map(colind_LID.size());
958 
959  const size_t numMyRows = rowptr.size() - 1;
960  local_map_type domainMap_local = domainMap.getLocalMap();
961 
962  const size_t numDomainElements = domainMap.getLocalNumElements();
963  Kokkos::View<bool*, DT> LocalGIDs_view("LocalGIDs", numDomainElements);
964  auto LocalGIDs_host = Kokkos::create_mirror_view(LocalGIDs_view);
965 
966  size_t NumLocalColGIDs = 0;
967 
968  // Scan all column indices and sort into two groups:
969  // Local: those whose GID matches a GID of the domain map on this processor and
970  // Remote: All others.
971  // Kokkos::Parallel_reduce sums up NumLocalColGIDs, while we use the size of the Remote GIDs map to find NumRemoteColGIDs
972  Kokkos::parallel_reduce(
973  team_policy(numMyRows, Kokkos::AUTO), KOKKOS_LAMBDA(const typename team_policy::member_type& member, size_t& update) {
974  const int i = member.league_rank();
975  size_t NumLocalColGIDs_temp = 0;
976  size_t rowptr_start = rowptr_view[i];
977  size_t rowptr_end = rowptr_view[i + 1];
978  Kokkos::parallel_reduce(
979  Kokkos::TeamThreadRange(member, rowptr_start, rowptr_end), [&](const size_t j, size_t& innerUpdate) {
980  const GO GID = colind_GID_view[j];
981  // Check if GID matches a row GID in local domain map
982  const LO LID = domainMap_local.getLocalElement(GID);
983  if (LID != -1) {
984  auto outcome = LocalGIDs_view_map.insert(LID);
985  // Fresh insert
986  if (outcome.success()) {
987  LocalGIDs_view[LID] = true;
988  innerUpdate++;
989  }
990  } else {
991  const int PID = owningPIDs_view[j];
992  auto outcome = RemoteGIDs_view_map.insert(GID, PID);
993  if (outcome.success() && PID == -1) {
994  Kokkos::abort("Cannot figure out if ID is owned.\n");
995  }
996  }
997  },
998  NumLocalColGIDs_temp);
999  if (member.team_rank() == 0) update += NumLocalColGIDs_temp;
1000  },
1001  NumLocalColGIDs);
1002 
1003  LO NumRemoteColGIDs = RemoteGIDs_view_map.size();
1004 
1005  Kokkos::View<int*, DT> PIDList_view("PIDList", NumRemoteColGIDs);
1006  auto PIDList_host = Kokkos::create_mirror_view(PIDList_view);
1007 
1008  Kokkos::View<GO*, DT> RemoteGIDList_view("RemoteGIDList", NumRemoteColGIDs);
1009  auto RemoteGIDList_host = Kokkos::create_mirror_view(RemoteGIDList_view);
1010 
1011  // For each index in RemoteGIDs_map that contains a GID, use "update" to indicate the number of GIDs "before" this GID
1012  // This maps each element in the RemoteGIDs hash table to an index in RemoteGIDList / PIDList without any overwriting or empty spaces between indices
1013  Kokkos::parallel_scan(
1014  Kokkos::RangePolicy<execution_space>(0, RemoteGIDs_view_map.capacity()), KOKKOS_LAMBDA(const int i, GO& update, const bool final) {
1015  if (final && RemoteGIDs_view_map.valid_at(i)) {
1016  RemoteGIDList_view[update] = RemoteGIDs_view_map.key_at(i);
1017  PIDList_view[update] = RemoteGIDs_view_map.value_at(i);
1018  }
1019  if (RemoteGIDs_view_map.valid_at(i)) {
1020  update += 1;
1021  }
1022  });
1023 
1024  // Possible short-circuit: If all domain map GIDs are present as
1025  // column indices, then set ColMap=domainMap and quit.
1026  if (domainMap.getComm()->getSize() == 1) {
1027  // Sanity check: When there is only one process, there can be no
1028  // remoteGIDs.
1029  TEUCHOS_TEST_FOR_EXCEPTION(
1030  NumRemoteColGIDs != 0, std::runtime_error, prefix << "There is only one "
1031  "process in the domain Map's communicator, which means that there are no "
1032  "\"remote\" indices. Nevertheless, some column indices are not in the "
1033  "domain Map.");
1034  if (static_cast<size_t>(NumLocalColGIDs) == numDomainElements) {
1035  // In this case, we just use the domainMap's indices, which is,
1036  // not coincidently, what we clobbered colind with up above
1037  // anyway. No further reindexing is needed.
1038  colMap = domainMapRCP;
1039 
1040  // Fill out local colMap (which should only contain local GIDs)
1041  auto localColMap = colMap->getLocalMap();
1042  Kokkos::parallel_for(
1043  Kokkos::RangePolicy<execution_space>(0, colind_GID.size()), KOKKOS_LAMBDA(const int i) {
1044  colind_LID_view[i] = localColMap.getLocalElement(colind_GID_view[i]);
1045  });
1046  Kokkos::deep_copy(execution_space(), colind_LID_host, colind_LID_view);
1047  return;
1048  }
1049  }
1050 
1051  // Now build the array containing column GIDs
1052  // Build back end, containing remote GIDs, first
1053  const LO numMyCols = NumLocalColGIDs + NumRemoteColGIDs;
1054  Kokkos::View<GO*, DT> ColIndices_view("ColIndices", numMyCols);
1055 
1056  // We don't need to load the backend of ColIndices or sort if there are no remote GIDs
1057  if (NumRemoteColGIDs > 0) {
1058  if (NumLocalColGIDs != static_cast<size_t>(numMyCols)) {
1059  Kokkos::parallel_for(
1060  Kokkos::RangePolicy<execution_space>(0, NumRemoteColGIDs), KOKKOS_LAMBDA(const int i) {
1061  ColIndices_view[NumLocalColGIDs + i] = RemoteGIDList_view[i];
1062  });
1063  }
1064 
1065  // Find the largest PID for bin sorting purposes
1066  int PID_max = 0;
1067  Kokkos::parallel_reduce(
1068  Kokkos::RangePolicy<execution_space>(0, PIDList_host.size()), KOKKOS_LAMBDA(const int i, int& max) {
1069  if (max < PIDList_view[i]) max = PIDList_view[i];
1070  },
1071  Kokkos::Max<int>(PID_max));
1072 
1073  using KeyViewTypePID = decltype(PIDList_view);
1074  using BinSortOpPID = Kokkos::BinOp1D<KeyViewTypePID>;
1075 
1076  // Make a subview of ColIndices for remote GID sorting
1077  auto ColIndices_subview = Kokkos::subview(ColIndices_view, Kokkos::make_pair(NumLocalColGIDs, ColIndices_view.size()));
1078 
1079  // Make binOp with bins = PID_max + 1, min = 0, max = PID_max
1080  BinSortOpPID binOp2(PID_max + 1, 0, PID_max);
1081 
1082  // Sort External column indices so that all columns coming from a
1083  // given remote processor are contiguous. This is a sort with one
1084  // auxilary array: RemoteColIndices
1085  Kokkos::BinSort<KeyViewTypePID, BinSortOpPID> bin_sort2(PIDList_view, 0, PIDList_view.size(), binOp2, false);
1086  bin_sort2.create_permute_vector(exec);
1087  bin_sort2.sort(exec, PIDList_view);
1088  bin_sort2.sort(exec, ColIndices_subview);
1089 
1090  // Deep copy back from device to host
1091  Kokkos::deep_copy(exec, PIDList_host, PIDList_view);
1092 
1093  // Stash the RemotePIDs. Once remotePIDs is changed to become a Kokkos view, we can remove this and copy directly.
1094  // Note: If Teuchos::Array had a shrink_to_fit like std::vector,
1095  // we'd call it here.
1096 
1097  exec.fence("fence before setting PIDList");
1098  Teuchos::Array<int> PIDList(NumRemoteColGIDs);
1099  for (LO i = 0; i < NumRemoteColGIDs; ++i) {
1100  PIDList[i] = PIDList_host[i];
1101  }
1102 
1103  remotePIDs = PIDList;
1104 
1105  // Sort external column indices so that columns from a given remote
1106  // processor are not only contiguous but also in ascending
1107  // order. NOTE: I don't know if the number of externals associated
1108  // with a given remote processor is known at this point ... so I
1109  // count them here.
1110  LO StartCurrent = 0, StartNext = 1;
1111  while (StartNext < NumRemoteColGIDs) {
1112  if (PIDList_host[StartNext] == PIDList_host[StartNext - 1]) {
1113  StartNext++;
1114  } else {
1115  Kokkos::sort(ColIndices_view, NumLocalColGIDs + StartCurrent, NumLocalColGIDs + StartNext);
1116  StartCurrent = StartNext;
1117  StartNext++;
1118  }
1119  }
1120 
1121  Kokkos::sort(ColIndices_view, NumLocalColGIDs + StartCurrent, NumLocalColGIDs + StartNext);
1122  }
1123 
1124  // Build permute array for *local* reindexing.
1125 
1126  // Now fill front end. Two cases:
1127  //
1128  // (1) If the number of Local column GIDs is the same as the number
1129  // of Local domain GIDs, we can simply read the domain GIDs into
1130  // the front part of ColIndices, otherwise
1131  //
1132  // (2) We step through the GIDs of the domainMap, checking to see if
1133  // each domain GID is a column GID. we want to do this to
1134  // maintain a consistent ordering of GIDs between the columns
1135  // and the domain.
1136  if (static_cast<size_t>(NumLocalColGIDs) == numDomainElements) {
1137  if (NumLocalColGIDs > 0) {
1138  // Load Global Indices into first numMyCols elements column GID list
1139  Kokkos::parallel_for(
1140  Kokkos::RangePolicy<execution_space>(0, numDomainElements), KOKKOS_LAMBDA(const int i) {
1141  ColIndices_view[i] = domainMap_local.getGlobalElement(i);
1142  });
1143  }
1144  } else {
1145  // This part isn't actually tested in the unit tests
1146  LO NumLocalAgain = 0;
1147  Kokkos::parallel_scan(
1148  Kokkos::RangePolicy<execution_space>(0, numDomainElements), KOKKOS_LAMBDA(const int i, LO& update, const bool final) {
1149  if (final && LocalGIDs_view[i]) {
1150  ColIndices_view[update] = domainMap_local.getGlobalElement(i);
1151  }
1152  if (LocalGIDs_view[i]) {
1153  update++;
1154  }
1155  },
1156  NumLocalAgain);
1157 
1158  TEUCHOS_TEST_FOR_EXCEPTION(
1159  static_cast<size_t>(NumLocalAgain) != NumLocalColGIDs,
1160  std::runtime_error, prefix << "Local ID count test failed.");
1161  }
1162 
1163  // Make column Map
1164  const GST minus_one = Teuchos::OrdinalTraits<GST>::invalid();
1165 
1166  colMap = rcp(new map_type(minus_one, ColIndices_view, domainMap.getIndexBase(),
1167  domainMap.getComm()));
1168 
1169  // Fill out colind_LID using local map
1170  auto localColMap = colMap->getLocalMap();
1171  Kokkos::parallel_for(
1172  Kokkos::RangePolicy<execution_space>(0, colind_GID.size()), KOKKOS_LAMBDA(const int i) {
1173  colind_LID_view[i] = localColMap.getLocalElement(colind_GID_view[i]);
1174  });
1175 
1176  // For now, we copy back into colind_LID_host (which also overwrites the colind_LID Tuechos array)
1177  // When colind_LID becomes a Kokkos View we can delete this
1178  Kokkos::deep_copy(exec, colind_LID_host, colind_LID_view);
1179 }
1180 
1181 template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
1182 void lowCommunicationMakeColMapAndReindex(
1183  const Kokkos::View<size_t*, typename Node::device_type> rowptr_view,
1184  const Kokkos::View<LocalOrdinal*, typename Node::device_type> colind_LID_view,
1185  const Kokkos::View<GlobalOrdinal*, typename Node::device_type> colind_GID_view,
1186  const Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>& domainMapRCP,
1187  const Kokkos::View<int*, typename Node::device_type> owningPIDs_view,
1188  Teuchos::Array<int>& remotePIDs,
1189  Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>& colMap) {
1190  using Teuchos::rcp;
1191  typedef LocalOrdinal LO;
1192  typedef GlobalOrdinal GO;
1193  typedef Tpetra::global_size_t GST;
1194  typedef Tpetra::Map<LO, GO, Node> map_type;
1195  const char prefix[] = "lowCommunicationMakeColMapAndReindex: ";
1196 
1197  typedef typename Node::device_type DT;
1198  using execution_space = typename DT::execution_space;
1199  execution_space exec;
1200  using team_policy = Kokkos::TeamPolicy<execution_space, Kokkos::Schedule<Kokkos::Dynamic>>;
1201  typedef typename map_type::local_map_type local_map_type;
1202 
1203  // The domainMap is an RCP because there is a shortcut for a
1204  // (common) special case to return the columnMap = domainMap.
1205  const map_type& domainMap = *domainMapRCP;
1206 
1207  Kokkos::UnorderedMap<LO, bool, DT> LocalGIDs_view_map(colind_LID_view.size());
1208  Kokkos::UnorderedMap<GO, LO, DT> RemoteGIDs_view_map(colind_LID_view.size());
1209 
1210  const size_t numMyRows = rowptr_view.size() - 1;
1211  local_map_type domainMap_local = domainMap.getLocalMap();
1212 
1213  const size_t numDomainElements = domainMap.getLocalNumElements();
1214  Kokkos::View<bool*, DT> LocalGIDs_view("LocalGIDs", numDomainElements);
1215  auto LocalGIDs_host = Kokkos::create_mirror_view(LocalGIDs_view);
1216 
1217  size_t NumLocalColGIDs = 0;
1218 
1219  // Scan all column indices and sort into two groups:
1220  // Local: those whose GID matches a GID of the domain map on this processor and
1221  // Remote: All others.
1222  // Kokkos::Parallel_reduce sums up NumLocalColGIDs, while we use the size of the Remote GIDs map to find NumRemoteColGIDs
1223  Kokkos::parallel_reduce(
1224  team_policy(numMyRows, Kokkos::AUTO), KOKKOS_LAMBDA(const typename team_policy::member_type& member, size_t& update) {
1225  const int i = member.league_rank();
1226  size_t NumLocalColGIDs_temp = 0;
1227  size_t rowptr_start = rowptr_view[i];
1228  size_t rowptr_end = rowptr_view[i + 1];
1229  Kokkos::parallel_reduce(
1230  Kokkos::TeamThreadRange(member, rowptr_start, rowptr_end), [&](const size_t j, size_t& innerUpdate) {
1231  const GO GID = colind_GID_view[j];
1232  // Check if GID matches a row GID in local domain map
1233  const LO LID = domainMap_local.getLocalElement(GID);
1234  if (LID != -1) {
1235  auto outcome = LocalGIDs_view_map.insert(LID);
1236  // Fresh insert
1237  if (outcome.success()) {
1238  LocalGIDs_view[LID] = true;
1239  innerUpdate++;
1240  }
1241  } else {
1242  const int PID = owningPIDs_view[j];
1243  auto outcome = RemoteGIDs_view_map.insert(GID, PID);
1244  if (outcome.success() && PID == -1) {
1245  Kokkos::abort("Cannot figure out if ID is owned.\n");
1246  }
1247  }
1248  },
1249  NumLocalColGIDs_temp);
1250  if (member.team_rank() == 0) update += NumLocalColGIDs_temp;
1251  },
1252  NumLocalColGIDs);
1253 
1254  LO NumRemoteColGIDs = RemoteGIDs_view_map.size();
1255 
1256  Kokkos::View<int*, DT> PIDList_view("PIDList_d", NumRemoteColGIDs);
1257 
1258  Kokkos::View<GO*, DT> RemoteGIDList_view("RemoteGIDList", NumRemoteColGIDs);
1259  auto RemoteGIDList_host = Kokkos::create_mirror_view(RemoteGIDList_view);
1260 
1261  // For each index in RemoteGIDs_map that contains a GID, use "update" to indicate the number of GIDs "before" this GID
1262  // This maps each element in the RemoteGIDs hash table to an index in RemoteGIDList / PIDList without any overwriting or empty spaces between indices
1263  Kokkos::parallel_scan(
1264  Kokkos::RangePolicy<execution_space>(0, RemoteGIDs_view_map.capacity()), KOKKOS_LAMBDA(const int i, GO& update, const bool final) {
1265  if (final && RemoteGIDs_view_map.valid_at(i)) {
1266  RemoteGIDList_view[update] = RemoteGIDs_view_map.key_at(i);
1267  PIDList_view[update] = RemoteGIDs_view_map.value_at(i);
1268  }
1269  if (RemoteGIDs_view_map.valid_at(i)) {
1270  update += 1;
1271  }
1272  });
1273 
1274  // Possible short-circuit: If all domain map GIDs are present as
1275  // column indices, then set ColMap=domainMap and quit.
1276  if (domainMap.getComm()->getSize() == 1) {
1277  // Sanity check: When there is only one process, there can be no
1278  // remoteGIDs.
1279  TEUCHOS_TEST_FOR_EXCEPTION(
1280  NumRemoteColGIDs != 0, std::runtime_error, prefix << "There is only one "
1281  "process in the domain Map's communicator, which means that there are no "
1282  "\"remote\" indices. Nevertheless, some column indices are not in the "
1283  "domain Map.");
1284  if (static_cast<size_t>(NumLocalColGIDs) == numDomainElements) {
1285  // In this case, we just use the domainMap's indices, which is,
1286  // not coincidently, what we clobbered colind with up above
1287  // anyway. No further reindexing is needed.
1288  colMap = domainMapRCP;
1289 
1290  // Fill out local colMap (which should only contain local GIDs)
1291  auto localColMap = colMap->getLocalMap();
1292  Kokkos::parallel_for(
1293  Kokkos::RangePolicy<execution_space>(0, colind_GID_view.size()), KOKKOS_LAMBDA(const int i) {
1294  colind_LID_view[i] = localColMap.getLocalElement(colind_GID_view[i]);
1295  });
1296  return;
1297  }
1298  }
1299 
1300  // Now build the array containing column GIDs
1301  // Build back end, containing remote GIDs, first
1302  const LO numMyCols = NumLocalColGIDs + NumRemoteColGIDs;
1303  Kokkos::View<GO*, DT> ColIndices_view("ColIndices", numMyCols);
1304 
1305  // We don't need to load the backend of ColIndices or sort if there are no remote GIDs
1306  if (NumRemoteColGIDs > 0) {
1307  if (NumLocalColGIDs != static_cast<size_t>(numMyCols)) {
1308  Kokkos::parallel_for(
1309  Kokkos::RangePolicy<execution_space>(0, NumRemoteColGIDs), KOKKOS_LAMBDA(const int i) {
1310  ColIndices_view[NumLocalColGIDs + i] = RemoteGIDList_view[i];
1311  });
1312  }
1313 
1314  // Find the largest PID for bin sorting purposes
1315  int PID_max = 0;
1316  Kokkos::parallel_reduce(
1317  Kokkos::RangePolicy<execution_space>(0, PIDList_view.size()), KOKKOS_LAMBDA(const int i, int& max) {
1318  if (max < PIDList_view[i]) max = PIDList_view[i];
1319  },
1320  Kokkos::Max<int>(PID_max));
1321 
1322  using KeyViewTypePID = decltype(PIDList_view);
1323  using BinSortOpPID = Kokkos::BinOp1D<KeyViewTypePID>;
1324 
1325  // Make a subview of ColIndices for remote GID sorting
1326  auto ColIndices_subview = Kokkos::subview(ColIndices_view, Kokkos::make_pair(NumLocalColGIDs, ColIndices_view.size()));
1327 
1328  // Make binOp with bins = PID_max + 1, min = 0, max = PID_max
1329  BinSortOpPID binOp2(PID_max + 1, 0, PID_max);
1330 
1331  // Sort External column indices so that all columns coming from a
1332  // given remote processor are contiguous. This is a sort with one
1333  // auxilary array: RemoteColIndices
1334  Kokkos::BinSort<KeyViewTypePID, BinSortOpPID> bin_sort2(PIDList_view, 0, PIDList_view.size(), binOp2, false);
1335  bin_sort2.create_permute_vector(exec);
1336  bin_sort2.sort(exec, PIDList_view);
1337  bin_sort2.sort(exec, ColIndices_subview);
1338 
1339  // Deep copy back from device to host
1340  // Stash the RemotePIDs. Once remotePIDs is changed to become a Kokkos view, we can remove this and copy directly.
1341  Teuchos::Array<int> PIDList(NumRemoteColGIDs);
1342  Kokkos::View<int*, Kokkos::HostSpace> PIDList_host(PIDList.data(), PIDList.size());
1343  Kokkos::deep_copy(exec, PIDList_host, PIDList_view);
1344  exec.fence();
1345 
1346  remotePIDs = PIDList;
1347 
1348  // Sort external column indices so that columns from a given remote
1349  // processor are not only contiguous but also in ascending
1350  // order. NOTE: I don't know if the number of externals associated
1351  // with a given remote processor is known at this point ... so I
1352  // count them here.
1353  LO StartCurrent = 0, StartNext = 1;
1354  while (StartNext < NumRemoteColGIDs) {
1355  if (PIDList_host[StartNext] == PIDList_host[StartNext - 1]) {
1356  StartNext++;
1357  } else {
1358  Kokkos::sort(ColIndices_view, NumLocalColGIDs + StartCurrent, NumLocalColGIDs + StartNext);
1359  StartCurrent = StartNext;
1360  StartNext++;
1361  }
1362  }
1363 
1364  Kokkos::sort(ColIndices_view, NumLocalColGIDs + StartCurrent, NumLocalColGIDs + StartNext);
1365  }
1366 
1367  // Build permute array for *local* reindexing.
1368 
1369  // Now fill front end. Two cases:
1370  //
1371  // (1) If the number of Local column GIDs is the same as the number
1372  // of Local domain GIDs, we can simply read the domain GIDs into
1373  // the front part of ColIndices, otherwise
1374  //
1375  // (2) We step through the GIDs of the domainMap, checking to see if
1376  // each domain GID is a column GID. we want to do this to
1377  // maintain a consistent ordering of GIDs between the columns
1378  // and the domain.
1379  if (static_cast<size_t>(NumLocalColGIDs) == numDomainElements) {
1380  if (NumLocalColGIDs > 0) {
1381  // Load Global Indices into first numMyCols elements column GID list
1382  Kokkos::parallel_for(
1383  Kokkos::RangePolicy<execution_space>(0, numDomainElements), KOKKOS_LAMBDA(const int i) {
1384  ColIndices_view[i] = domainMap_local.getGlobalElement(i);
1385  });
1386  }
1387  } else {
1388  // This part isn't actually tested in the unit tests
1389  LO NumLocalAgain = 0;
1390  Kokkos::parallel_scan(
1391  Kokkos::RangePolicy<execution_space>(0, numDomainElements), KOKKOS_LAMBDA(const int i, LO& update, const bool final) {
1392  if (final && LocalGIDs_view[i]) {
1393  ColIndices_view[update] = domainMap_local.getGlobalElement(i);
1394  }
1395  if (LocalGIDs_view[i]) {
1396  update++;
1397  }
1398  },
1399  NumLocalAgain);
1400 
1401  TEUCHOS_TEST_FOR_EXCEPTION(
1402  static_cast<size_t>(NumLocalAgain) != NumLocalColGIDs,
1403  std::runtime_error, prefix << "Local ID count test failed.");
1404  }
1405 
1406  // Make column Map
1407  const GST minus_one = Teuchos::OrdinalTraits<GST>::invalid();
1408 
1409  colMap = rcp(new map_type(minus_one, ColIndices_view, domainMap.getIndexBase(),
1410  domainMap.getComm()));
1411 
1412  // Fill out colind_LID using local map
1413  auto localColMap = colMap->getLocalMap();
1414  Kokkos::parallel_for(
1415  Kokkos::RangePolicy<execution_space>(0, colind_GID_view.size()), KOKKOS_LAMBDA(const int i) {
1416  colind_LID_view[i] = localColMap.getLocalElement(colind_GID_view[i]);
1417  });
1418 }
1419 
1420 // Generates an list of owning PIDs based on two transfer (aka import/export objects)
1421 // Let:
1422 // OwningMap = useReverseModeForOwnership ? transferThatDefinesOwnership.getTargetMap() : transferThatDefinesOwnership.getSourceMap();
1423 // MapAo = useReverseModeForOwnership ? transferThatDefinesOwnership.getSourceMap() : transferThatDefinesOwnership.getTargetMap();
1424 // MapAm = useReverseModeForMigration ? transferThatDefinesMigration.getTargetMap() : transferThatDefinesMigration.getSourceMap();
1425 // VectorMap = useReverseModeForMigration ? transferThatDefinesMigration.getSourceMap() : transferThatDefinesMigration.getTargetMap();
1426 // Precondition:
1427 // 1) MapAo.isSameAs(*MapAm) - map compatibility between transfers
1428 // 2) VectorMap->isSameAs(*owningPIDs->getMap()) - map compabibility between transfer & vector
1429 // 3) OwningMap->isOneToOne() - owning map is 1-to-1
1430 // --- Precondition 3 is only checked in DEBUG mode ---
1431 // Postcondition:
1432 // owningPIDs[VectorMap->getLocalElement(GID i)] = j iff (OwningMap->isLocalElement(GID i) on rank j)
1433 template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
1434 void getTwoTransferOwnershipVector(const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& transferThatDefinesOwnership,
1435  bool useReverseModeForOwnership,
1436  const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& transferThatDefinesMigration,
1437  bool useReverseModeForMigration,
1441 
1442  Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> OwningMap = useReverseModeForOwnership ? transferThatDefinesOwnership.getTargetMap() : transferThatDefinesOwnership.getSourceMap();
1443  Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> MapAo = useReverseModeForOwnership ? transferThatDefinesOwnership.getSourceMap() : transferThatDefinesOwnership.getTargetMap();
1444  Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> MapAm = useReverseModeForMigration ? transferThatDefinesMigration.getTargetMap() : transferThatDefinesMigration.getSourceMap();
1445  Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> VectorMap = useReverseModeForMigration ? transferThatDefinesMigration.getSourceMap() : transferThatDefinesMigration.getTargetMap();
1446 
1447  TEUCHOS_TEST_FOR_EXCEPTION(!MapAo->isSameAs(*MapAm), std::runtime_error, "Tpetra::Import_Util::getTwoTransferOwnershipVector map mismatch between transfers");
1448  TEUCHOS_TEST_FOR_EXCEPTION(!VectorMap->isSameAs(*owningPIDs.getMap()), std::runtime_error, "Tpetra::Import_Util::getTwoTransferOwnershipVector map mismatch transfer and vector");
1449 #ifdef HAVE_TPETRA_DEBUG
1450  TEUCHOS_TEST_FOR_EXCEPTION(!OwningMap->isOneToOne(), std::runtime_error, "Tpetra::Import_Util::getTwoTransferOwnershipVector owner must be 1-to-1");
1451 #endif
1452 
1453  int rank = OwningMap->getComm()->getRank();
1454  // Generate "A" vector and fill it with owning information. We can read this from transferThatDefinesOwnership w/o communication
1455  // Note: Due to the 1-to-1 requirement, several of these options throw
1457  const import_type* ownAsImport = dynamic_cast<const import_type*>(&transferThatDefinesOwnership);
1458  const export_type* ownAsExport = dynamic_cast<const export_type*>(&transferThatDefinesOwnership);
1459 
1460  Teuchos::ArrayRCP<int> pids = temp.getDataNonConst();
1461  Teuchos::ArrayView<int> v_pids = pids();
1462  if (ownAsImport && useReverseModeForOwnership) {
1463  TEUCHOS_TEST_FOR_EXCEPTION(1, std::runtime_error, "Tpetra::Import_Util::getTwoTransferOwnershipVector owner must be 1-to-1");
1464  } else if (ownAsImport && !useReverseModeForOwnership)
1465  getPids(*ownAsImport, v_pids, false);
1466  else if (ownAsExport && useReverseModeForMigration) {
1467  TEUCHOS_TEST_FOR_EXCEPTION(1, std::runtime_error, "Tpetra::Import_Util::getTwoTransferOwnershipVector this option not yet implemented");
1468  } else {
1469  TEUCHOS_TEST_FOR_EXCEPTION(1, std::runtime_error, "Tpetra::Import_Util::getTwoTransferOwnershipVector owner must be 1-to-1");
1470  }
1471 
1472  const import_type* xferAsImport = dynamic_cast<const import_type*>(&transferThatDefinesMigration);
1473  const export_type* xferAsExport = dynamic_cast<const export_type*>(&transferThatDefinesMigration);
1474  TEUCHOS_TEST_FOR_EXCEPTION(!xferAsImport && !xferAsExport, std::runtime_error, "Tpetra::Import_Util::getTwoTransferOwnershipVector transfer undefined");
1475 
1476  // Migrate from "A" vector to output vector
1477  owningPIDs.putScalar(rank);
1478  if (xferAsImport && useReverseModeForMigration)
1479  owningPIDs.doExport(temp, *xferAsImport, Tpetra::REPLACE);
1480  else if (xferAsImport && !useReverseModeForMigration)
1481  owningPIDs.doImport(temp, *xferAsImport, Tpetra::REPLACE);
1482  else if (xferAsExport && useReverseModeForMigration)
1483  owningPIDs.doImport(temp, *xferAsExport, Tpetra::REPLACE);
1484  else
1485  owningPIDs.doExport(temp, *xferAsExport, Tpetra::REPLACE);
1486 }
1487 
1488 } // namespace Import_Util
1489 } // namespace Tpetra
1490 
1491 #endif // TPETRA_IMPORT_UTIL_HPP
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
Add specializations of Teuchos::Details::MpiTypeTraits for Kokkos::complex&lt;float&gt; and Kokkos::complex...
void doImport(const SrcDistObject &source, const Import< LocalOrdinal, GlobalOrdinal, Node > &importer, const CombineMode CM, const bool restrictedMode=false)
Import data into this object using an Import object (&quot;forward mode&quot;).
void putScalar(const Scalar &value)
Set all values in the multivector with the given value.
size_t getLocalNumElements() const
The number of elements belonging to the calling process.
void sort3(const IT1 &first1, const IT1 &last1, const IT2 &first2, const IT3 &first3, const bool stableSort=false)
Sort the first array, and apply the same permutation to the second and third arrays.
Declaration of the Tpetra::CrsMatrix class.
Teuchos::ArrayView< const int > getExportPIDs() const
List of processes to which entries will be sent.
Teuchos::RCP< const map_type > getTargetMap() const
The target Map used to construct this Export or Import.
void sort(View &view, const size_t &size)
Convenience wrapper for std::sort for host-accessible views.
size_t global_size_t
Global size_t object.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2, const bool stableSort=false)
Sort the first array, and apply the resulting permutation to the second array.
size_t getNumExportIDs() const
Number of entries that must be sent by the calling process to other processes.
Declaration and definition of Tpetra::Details::reallocDualViewIfNeeded, an implementation detail of T...
Replace existing values with new values.
void start()
Start the deep_copy counter.
Teuchos::ArrayView< const LO > getExportLIDs() const
List of entries in the source Map that will be sent to other processes.
local_map_type getLocalMap() const
Get the LocalMap for Kokkos-Kernels.
A parallel distribution of indices over processes.
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Export data into this object using an Export object (&quot;forward mode&quot;).
KOKKOS_INLINE_FUNCTION LocalOrdinal getLocalNumElements() const
The number of indices that live on the calling process.
A distributed dense vector.
Stand-alone utility functions and macros.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary...