Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Import_Util2.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_IMPORT_UTIL2_HPP
11 #define TPETRA_IMPORT_UTIL2_HPP
12 
17 
18 #include "Tpetra_ConfigDefs.hpp"
19 #include "Tpetra_Import.hpp"
20 #include "Tpetra_HashTable.hpp"
21 #include "Tpetra_Map.hpp"
22 #include "Tpetra_Util.hpp"
23 #include "Tpetra_Distributor.hpp"
26 #include "Tpetra_Vector.hpp"
27 #include "Kokkos_DualView.hpp"
28 #include "KokkosSparse_SortCrs.hpp"
29 #include <Teuchos_Array.hpp>
31 #include <Kokkos_UnorderedMap.hpp>
32 #include <unordered_map>
33 #include <utility>
34 #include <set>
35 
37 
38 #include <Kokkos_Core.hpp>
39 #include <Kokkos_Sort.hpp>
40 
41 namespace Tpetra {
42 namespace Import_Util {
43 
46 template<typename Scalar, typename Ordinal>
47 void
48 sortCrsEntries (const Teuchos::ArrayView<size_t>& CRS_rowptr,
49  const Teuchos::ArrayView<Ordinal>& CRS_colind,
50  const Teuchos::ArrayView<Scalar>&CRS_vals);
51 
52 template<typename Ordinal>
53 void
54 sortCrsEntries (const Teuchos::ArrayView<size_t>& CRS_rowptr,
55  const Teuchos::ArrayView<Ordinal>& CRS_colind);
56 
57 template<typename rowptr_array_type, typename colind_array_type, typename vals_array_type>
58 void
59 sortCrsEntries (const rowptr_array_type& CRS_rowptr,
60  const colind_array_type& CRS_colind,
61  const vals_array_type& CRS_vals);
62 
63 template<typename rowptr_array_type, typename colind_array_type>
64 void
65 sortCrsEntries (const rowptr_array_type& CRS_rowptr,
66  const colind_array_type& CRS_colind);
67 
72 template<typename Scalar, typename Ordinal>
73 void
74 sortAndMergeCrsEntries (const Teuchos::ArrayView<size_t>& CRS_rowptr,
75  const Teuchos::ArrayView<Ordinal>& CRS_colind,
76  const Teuchos::ArrayView<Scalar>& CRS_vals);
77 
78 template<typename Ordinal>
79 void
80 sortAndMergeCrsEntries (const Teuchos::ArrayView<size_t>& CRS_rowptr,
81  const Teuchos::ArrayView<Ordinal>& CRS_colind);
82 
83 template<class rowptr_view_type, class colind_view_type, class vals_view_type>
84 void
85 sortAndMergeCrsEntries (const rowptr_view_type& CRS_rowptr,
86  const colind_view_type& CRS_colind,
87  const vals_view_type& CRS_vals);
88 
104 template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
105 void
106 lowCommunicationMakeColMapAndReindex (
107  const Teuchos::ArrayView<const size_t> &rowptr,
108  const Teuchos::ArrayView<LocalOrdinal> &colind_LID,
109  const Teuchos::ArrayView<GlobalOrdinal> &colind_GID,
110  const Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >& domainMapRCP,
111  const Teuchos::ArrayView<const int> &owningPIDs,
112  Teuchos::Array<int> &remotePIDs,
113  Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > & colMap);
114 
119 template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
120 void
121 lowCommunicationMakeColMapAndReindex (
122  const Kokkos::View<size_t*,typename Node::device_type> rowptr_view,
123  const Kokkos::View<LocalOrdinal*,typename Node::device_type> colind_LID_view,
124  const Kokkos::View<GlobalOrdinal*,typename Node::device_type> colind_GID_view,
125  const Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >& domainMapRCP,
126  const Teuchos::ArrayView<const int> &owningPIDs,
127  Teuchos::Array<int> &remotePIDs,
128  Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > & colMap);
129 
143  template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
144  void getTwoTransferOwnershipVector(const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& transferThatDefinesOwnership,
145  bool useReverseModeForOwnership,
146  const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& transferForMigratingData,
147  bool useReverseModeForMigration,
149 
150 } // namespace Import_Util
151 } // namespace Tpetra
152 
153 
154 //
155 // Implementations
156 //
157 
158 namespace Tpetra {
159 namespace Import_Util {
160 
161 
162 template<typename PID, typename GlobalOrdinal>
163 bool sort_PID_then_GID(const std::pair<PID,GlobalOrdinal> &a,
164  const std::pair<PID,GlobalOrdinal> &b)
165 {
166  if(a.first!=b.first)
167  return (a.first < b.first);
168  return (a.second < b.second);
169 }
170 
171 template<typename PID,
172  typename GlobalOrdinal,
173  typename LocalOrdinal>
174 bool sort_PID_then_pair_GID_LID(const std::pair<PID, std::pair< GlobalOrdinal, LocalOrdinal > > &a,
175  const std::pair<PID, std::pair< GlobalOrdinal, LocalOrdinal > > &b)
176 {
177  if(a.first!=b.first)
178  return a.first < b.first;
179  else
180  return (a.second.first < b.second.first);
181 }
182 
183 template<typename Scalar,
184  typename LocalOrdinal,
185  typename GlobalOrdinal,
186  typename Node>
187 void
188 reverseNeighborDiscovery(const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> & SourceMatrix,
189  const typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::row_ptrs_host_view_type & rowptr,
190  const typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_inds_host_view_type & colind,
192  Teuchos::RCP<const Tpetra::Import<LocalOrdinal,GlobalOrdinal,Node> > MyImporter,
193  Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > MyDomainMap,
194  Teuchos::ArrayRCP<int>& type3PIDs,
195  Teuchos::ArrayRCP<LocalOrdinal>& type3LIDs,
196  Teuchos::RCP<const Teuchos::Comm<int> >& rcomm)
197 {
198 #ifdef HAVE_TPETRACORE_MPI
199  using Teuchos::TimeMonitor;
200  using ::Tpetra::Details::Behavior;
201  typedef LocalOrdinal LO;
202  typedef GlobalOrdinal GO;
203  typedef std::pair<GO,GO> pidgidpair_t;
204  using Teuchos::RCP;
205  const std::string prefix {" Import_Util2::ReverseND:: "};
206  const std::string label ("IU2::Neighbor");
207 
208  // There can be no neighbor discovery if you don't have an importer
209  if(MyImporter.is_null()) return;
210 
211  std::ostringstream errstr;
212  bool error = false;
213  auto const comm = MyDomainMap->getComm();
214 
215  MPI_Comm rawComm = getRawMpiComm(*comm);
216  const int MyPID = rcomm->getRank ();
217 
218  // Things related to messages I am sending in forward mode (RowTransfer)
219  // *** Note: this will be incorrect for transferAndFillComplete if it is in reverse mode. FIXME cbl.
220  auto ExportPIDs = RowTransfer.getExportPIDs();
221  auto ExportLIDs = RowTransfer.getExportLIDs();
222  auto NumExportLIDs = RowTransfer.getNumExportIDs();
223 
224  Distributor & Distor = MyImporter->getDistributor();
225  const size_t NumRecvs = Distor.getNumReceives();
226  const size_t NumSends = Distor.getNumSends();
227  auto RemoteLIDs = MyImporter->getRemoteLIDs();
228  auto const ProcsFrom = Distor.getProcsFrom();
229  auto const ProcsTo = Distor.getProcsTo();
230 
231  auto LengthsFrom = Distor.getLengthsFrom();
232  auto MyColMap = SourceMatrix.getColMap();
233  const size_t numCols = MyColMap->getLocalNumElements ();
234  RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > target = MyImporter->getTargetMap();
235 
236  // Get the owning pids in a special way,
237  // s.t. ProcsFrom[RemotePIDs[i]] is the proc that owns RemoteLIDs[j]....
238  Teuchos::Array<int> RemotePIDOrder(numCols,-1);
239 
240  // For each remote ID, record index into ProcsFrom, who owns it.
241  for (size_t i = 0, j = 0; i < NumRecvs; ++i) {
242  for (size_t k = 0; k < LengthsFrom[i]; ++k) {
243  const int pid = ProcsFrom[i];
244  if (pid != MyPID) {
245  RemotePIDOrder[RemoteLIDs[j]] = i;
246  }
247  j++;
248  }
249  }
250 
251  // Step One: Start tacking the (GID,PID) pairs on the std sets
252  //
253  // For each index in ProcsFrom, we will insert into a set of (PID,
254  // GID) pairs, in order to build a list of such pairs for each of
255  // those processes. Since this is building a reverse, we will send
256  // to these processes.
257  Teuchos::Array<int> ReverseSendSizes(NumRecvs,0);
258  // do this as C array to avoid Teuchos::Array value initialization of all reserved memory
259  Teuchos::Array< Teuchos::ArrayRCP<pidgidpair_t > > RSB(NumRecvs);
260 
261  {
262 #ifdef HAVE_TPETRA_MMM_TIMINGS
263  TimeMonitor set_all(*TimeMonitor::getNewTimer(prefix + std::string("isMMallSetRSB")));
264 #endif
265 
266  // 25 Jul 2018: CBL
267  // todo:std::unordered_set (hash table),
268  // with an adequate prereservation ("bucket count").
269  // An onordered_set has to have a custom hasher for pid/gid pair
270  // However, when pidsets is copied to RSB, it will be in key
271  // order _not_ in pid,gid order. (unlike std::set).
272  // Impliment this with a reserve, and time BOTH building pidsets
273  // _and_ the sort after the receive. Even if unordered_set saves
274  // time, if it causes the sort to be longer, it's not a win.
275 
276  Teuchos::Array<std::set<pidgidpair_t>> pidsets(NumRecvs);
277  {
278 #ifdef HAVE_TPETRA_MMM_TIMINGS
279  TimeMonitor set_insert(*TimeMonitor::getNewTimer(prefix + std::string("isMMallSetRSBinsert")));
280 #endif
281  for(size_t i=0; i < NumExportLIDs; i++) {
282  LO lid = ExportLIDs[i];
283  GO exp_pid = ExportPIDs[i];
284  for(auto j=rowptr[lid]; j<rowptr[lid+1]; j++){
285  int pid_order = RemotePIDOrder[colind[j]];
286  if(pid_order!=-1) {
287  GO gid = MyColMap->getGlobalElement(colind[j]); //Epetra SM.GCID46 =>sm->graph-> {colmap(colind)}
288  auto tpair = pidgidpair_t(exp_pid,gid);
289  pidsets[pid_order].insert(pidsets[pid_order].end(),tpair);
290  }
291  }
292  }
293  }
294 
295  {
296 #ifdef HAVE_TPETRA_MMM_TIMINGS
297  TimeMonitor set_cpy(*TimeMonitor::getNewTimer(prefix + std::string("isMMallSetRSBcpy")));
298 #endif
299  int jj = 0;
300  for(auto &&ps : pidsets) {
301  auto s = ps.size();
302  RSB[jj] = Teuchos::arcp(new pidgidpair_t[ s ],0, s ,true);
303  std::copy(ps.begin(),ps.end(),RSB[jj]);
304  ReverseSendSizes[jj]=s;
305  ++jj;
306  }
307  }
308  } // end of set based packing.
309 
310  Teuchos::Array<int> ReverseRecvSizes(NumSends,-1);
311  Teuchos::Array<MPI_Request> rawBreq(ProcsFrom.size()+ProcsTo.size(), MPI_REQUEST_NULL);
312  // 25 Jul 2018: MPI_TAG_UB is the largest tag value; could be < 32768.
313  const int mpi_tag_base_ = 3;
314 
315  int mpireq_idx=0;
316  for(int i=0;i<ProcsTo.size();++i) {
317  int Rec_Tag = mpi_tag_base_ + ProcsTo[i];
318  int * thisrecv = (int *) (&ReverseRecvSizes[i]);
319  MPI_Request rawRequest = MPI_REQUEST_NULL;
320  MPI_Irecv(const_cast<int*>(thisrecv),
321  1,
322  MPI_INT,
323  ProcsTo[i],
324  Rec_Tag,
325  rawComm,
326  &rawRequest);
327  rawBreq[mpireq_idx++]=rawRequest;
328  }
329  for(int i=0;i<ProcsFrom.size();++i) {
330  int Send_Tag = mpi_tag_base_ + MyPID;
331  int * mysend = ( int *)(&ReverseSendSizes[i]);
332  MPI_Request rawRequest = MPI_REQUEST_NULL;
333  MPI_Isend(mysend,
334  1,
335  MPI_INT,
336  ProcsFrom[i],
337  Send_Tag,
338  rawComm,
339  &rawRequest);
340  rawBreq[mpireq_idx++]=rawRequest;
341  }
342  Teuchos::Array<MPI_Status> rawBstatus(rawBreq.size());
343 #ifdef HAVE_TPETRA_DEBUG
344  const int err1 =
345 #endif
346  MPI_Waitall (rawBreq.size(), rawBreq.getRawPtr(),
347  rawBstatus.getRawPtr());
348 
349 
350 #ifdef HAVE_TPETRA_DEBUG
351  if(err1) {
352  errstr <<MyPID<< "sE1 reverseNeighborDiscovery Mpi_Waitall error on send ";
353  error=true;
354  std::cerr<<errstr.str()<<std::flush;
355  }
356 #endif
357 
358  int totalexportpairrecsize = 0;
359  for (size_t i = 0; i < NumSends; ++i) {
360  totalexportpairrecsize += ReverseRecvSizes[i];
361 #ifdef HAVE_TPETRA_DEBUG
362  if(ReverseRecvSizes[i]<0) {
363  errstr << MyPID << "E4 reverseNeighborDiscovery: got < 0 for receive size "<<ReverseRecvSizes[i]<<std::endl;
364  error=true;
365  }
366 #endif
367  }
368  Teuchos::ArrayRCP<pidgidpair_t >AllReverseRecv= Teuchos::arcp(new pidgidpair_t[totalexportpairrecsize],0,totalexportpairrecsize,true);
369  int offset = 0;
370  mpireq_idx=0;
371  for(int i=0;i<ProcsTo.size();++i) {
372  int recv_data_size = ReverseRecvSizes[i]*2;
373  int recvData_MPI_Tag = mpi_tag_base_*2 + ProcsTo[i];
374  MPI_Request rawRequest = MPI_REQUEST_NULL;
375  GO * rec_bptr = (GO*) (&AllReverseRecv[offset]);
376  offset+=ReverseRecvSizes[i];
377  MPI_Irecv(rec_bptr,
378  recv_data_size,
379  ::Tpetra::Details::MpiTypeTraits<GO>::getType(rec_bptr[0]),
380  ProcsTo[i],
381  recvData_MPI_Tag,
382  rawComm,
383  &rawRequest);
384  rawBreq[mpireq_idx++]=rawRequest;
385  }
386  for(int ii=0;ii<ProcsFrom.size();++ii) {
387  GO * send_bptr = (GO*) (RSB[ii].getRawPtr());
388  MPI_Request rawSequest = MPI_REQUEST_NULL;
389  int send_data_size = ReverseSendSizes[ii]*2; // 2 == count of pair
390  int sendData_MPI_Tag = mpi_tag_base_*2+MyPID;
391  MPI_Isend(send_bptr,
392  send_data_size,
393  ::Tpetra::Details::MpiTypeTraits<GO>::getType(send_bptr[0]),
394  ProcsFrom[ii],
395  sendData_MPI_Tag,
396  rawComm,
397  &rawSequest);
398 
399  rawBreq[mpireq_idx++]=rawSequest;
400  }
401 #ifdef HAVE_TPETRA_DEBUG
402  const int err =
403 #endif
404  MPI_Waitall (rawBreq.size(),
405  rawBreq.getRawPtr(),
406  rawBstatus.getRawPtr());
407 #ifdef HAVE_TPETRA_DEBUG
408  if(err) {
409  errstr <<MyPID<< "E3.r reverseNeighborDiscovery Mpi_Waitall error on receive ";
410  error=true;
411  std::cerr<<errstr.str()<<std::flush;
412  }
413 #endif
414  std::sort(AllReverseRecv.begin(), AllReverseRecv.end(), Tpetra::Import_Util::sort_PID_then_GID<GlobalOrdinal, GlobalOrdinal>);
415 
416  auto newEndOfPairs = std::unique(AllReverseRecv.begin(), AllReverseRecv.end());
417 // don't resize to remove non-unique, just use the end-of-unique iterator
418  if(AllReverseRecv.begin() == newEndOfPairs) return;
419  int ARRsize = std::distance(AllReverseRecv.begin(),newEndOfPairs);
420  auto rPIDs = Teuchos::arcp(new int[ARRsize],0,ARRsize,true);
421  auto rLIDs = Teuchos::arcp(new LocalOrdinal[ARRsize],0,ARRsize,true);
422 
423  int tsize=0;
424  for(auto itr = AllReverseRecv.begin(); itr!=newEndOfPairs; ++itr ) {
425  if((int)(itr->first) != MyPID) {
426  rPIDs[tsize]=(int)itr->first;
427  LocalOrdinal lid = MyDomainMap->getLocalElement(itr->second);
428  rLIDs[tsize]=lid;
429  tsize++;
430  }
431  }
432 
433  type3PIDs=rPIDs.persistingView(0,tsize);
434  type3LIDs=rLIDs.persistingView(0,tsize);
435 
436  if(error){
437  std::cerr<<errstr.str()<<std::flush;
438  comm->barrier();
439  comm->barrier();
440  comm->barrier();
441  MPI_Abort (MPI_COMM_WORLD, -1);
442  }
443 #endif
444 }
445 
446 // Note: This should get merged with the other Tpetra sort routines eventually.
447 template<typename Scalar, typename Ordinal>
448 void
449 sortCrsEntries (const Teuchos::ArrayView<size_t> &CRS_rowptr,
450  const Teuchos::ArrayView<Ordinal> & CRS_colind,
451  const Teuchos::ArrayView<Scalar> &CRS_vals)
452 {
453  // For each row, sort column entries from smallest to largest.
454  // Use shell sort. Stable sort so it is fast if indices are already sorted.
455  // Code copied from Epetra_CrsMatrix::SortEntries()
456  size_t NumRows = CRS_rowptr.size()-1;
457  size_t nnz = CRS_colind.size();
458 
459  const bool permute_values_array = CRS_vals.size() > 0;
460 
461  for(size_t i = 0; i < NumRows; i++){
462  size_t start=CRS_rowptr[i];
463  if(start >= nnz) continue;
464 
465  size_t NumEntries = CRS_rowptr[i+1] - start;
466  Teuchos::ArrayRCP<Scalar> locValues;
467  if (permute_values_array)
468  locValues = Teuchos::arcp<Scalar>(&CRS_vals[start], 0, NumEntries, false);
469  Teuchos::ArrayRCP<Ordinal> locIndices(&CRS_colind[start], 0, NumEntries, false);
470 
471  Ordinal n = NumEntries;
472  Ordinal m = 1;
473  while (m<n) m = m*3+1;
474  m /= 3;
475 
476  while(m > 0) {
477  Ordinal max = n - m;
478  for(Ordinal j = 0; j < max; j++) {
479  for(Ordinal k = j; k >= 0; k-=m) {
480  if(locIndices[k+m] >= locIndices[k])
481  break;
482  if (permute_values_array) {
483  Scalar dtemp = locValues[k+m];
484  locValues[k+m] = locValues[k];
485  locValues[k] = dtemp;
486  }
487  Ordinal itemp = locIndices[k+m];
488  locIndices[k+m] = locIndices[k];
489  locIndices[k] = itemp;
490  }
491  }
492  m = m/3;
493  }
494  }
495 }
496 
497 template<typename Ordinal>
498 void
499 sortCrsEntries (const Teuchos::ArrayView<size_t> &CRS_rowptr,
500  const Teuchos::ArrayView<Ordinal> & CRS_colind)
501 {
502  // Generate dummy values array
503  Teuchos::ArrayView<Tpetra::Details::DefaultTypes::scalar_type> CRS_vals;
504  sortCrsEntries (CRS_rowptr, CRS_colind, CRS_vals);
505 }
506 
507 namespace Impl {
508 
509 template<class RowOffsetsType, class ColumnIndicesType, class ValuesType>
510 class SortCrsEntries {
511 private:
512  typedef typename ColumnIndicesType::non_const_value_type ordinal_type;
513  typedef typename ValuesType::non_const_value_type scalar_type;
514 
515 public:
516  SortCrsEntries (const RowOffsetsType& ptr,
517  const ColumnIndicesType& ind,
518  const ValuesType& val) :
519  ptr_ (ptr),
520  ind_ (ind),
521  val_ (val)
522  {
523  static_assert (std::is_signed<ordinal_type>::value, "The type of each "
524  "column index -- that is, the type of each entry of ind "
525  "-- must be signed in order for this functor to work.");
526  }
527 
528  KOKKOS_FUNCTION void operator() (const size_t i) const
529  {
530  const size_t nnz = ind_.extent (0);
531  const size_t start = ptr_(i);
532  const bool permute_values_array = val_.extent(0) > 0;
533 
534  if (start < nnz) {
535  const size_t NumEntries = ptr_(i+1) - start;
536 
537  const ordinal_type n = static_cast<ordinal_type> (NumEntries);
538  ordinal_type m = 1;
539  while (m<n) m = m*3+1;
540  m /= 3;
541 
542  while (m > 0) {
543  ordinal_type max = n - m;
544  for (ordinal_type j = 0; j < max; j++) {
545  for (ordinal_type k = j; k >= 0; k -= m) {
546  const size_t sk = start+k;
547  if (ind_(sk+m) >= ind_(sk)) {
548  break;
549  }
550  if (permute_values_array) {
551  const scalar_type dtemp = val_(sk+m);
552  val_(sk+m) = val_(sk);
553  val_(sk) = dtemp;
554  }
555  const ordinal_type itemp = ind_(sk+m);
556  ind_(sk+m) = ind_(sk);
557  ind_(sk) = itemp;
558  }
559  }
560  m = m/3;
561  }
562  }
563  }
564 
565  static void
566  sortCrsEntries (const RowOffsetsType& ptr,
567  const ColumnIndicesType& ind,
568  const ValuesType& val)
569  {
570  // For each row, sort column entries from smallest to largest.
571  // Use shell sort. Stable sort so it is fast if indices are already sorted.
572  // Code copied from Epetra_CrsMatrix::SortEntries()
573  // NOTE: This should not be taken as a particularly efficient way to sort
574  // rows of matrices in parallel. But it is correct, so that's something.
575  if (ptr.extent (0) == 0) {
576  return; // no rows, so nothing to sort
577  }
578  const size_t NumRows = ptr.extent (0) - 1;
579 
580  typedef size_t index_type; // what this function was using; not my choice
581  typedef typename ValuesType::execution_space execution_space;
582  // Specify RangePolicy explicitly, in order to use correct execution
583  // space. See #1345.
584  typedef Kokkos::RangePolicy<execution_space, index_type> range_type;
585  Kokkos::parallel_for ("sortCrsEntries", range_type (0, NumRows),
586  SortCrsEntries (ptr, ind, val));
587  }
588 
589 private:
590  RowOffsetsType ptr_;
591  ColumnIndicesType ind_;
592  ValuesType val_;
593 };
594 
595 } // namespace Impl
596 
597 template<typename rowptr_array_type, typename colind_array_type, typename vals_array_type>
598 void
599 sortCrsEntries (const rowptr_array_type& CRS_rowptr,
600  const colind_array_type& CRS_colind,
601  const vals_array_type& CRS_vals)
602 {
603  Impl::SortCrsEntries<rowptr_array_type, colind_array_type,
604  vals_array_type>::sortCrsEntries (CRS_rowptr, CRS_colind, CRS_vals);
605 }
606 
607 template<typename rowptr_array_type, typename colind_array_type>
608 void
609 sortCrsEntries (const rowptr_array_type& CRS_rowptr,
610  const colind_array_type& CRS_colind)
611 {
612  // Generate dummy values array
613  typedef typename colind_array_type::execution_space execution_space;
614  typedef Tpetra::Details::DefaultTypes::scalar_type scalar_type;
615  typedef typename Kokkos::View<scalar_type*, execution_space> scalar_view_type;
616  scalar_view_type CRS_vals;
617  sortCrsEntries<rowptr_array_type, colind_array_type,
618  scalar_view_type>(CRS_rowptr, CRS_colind, CRS_vals);
619 }
620 
621 // Note: This should get merged with the other Tpetra sort routines eventually.
622 template<typename Scalar, typename Ordinal>
623 void
624 sortAndMergeCrsEntries (const Teuchos::ArrayView<size_t> &CRS_rowptr,
625  const Teuchos::ArrayView<Ordinal> & CRS_colind,
626  const Teuchos::ArrayView<Scalar> &CRS_vals)
627 {
628  // For each row, sort column entries from smallest to largest,
629  // merging column ids that are identify by adding values. Use shell
630  // sort. Stable sort so it is fast if indices are already sorted.
631  // Code copied from Epetra_CrsMatrix::SortEntries()
632 
633  if (CRS_rowptr.size () == 0) {
634  return; // no rows, so nothing to sort
635  }
636  const size_t NumRows = CRS_rowptr.size () - 1;
637  const size_t nnz = CRS_colind.size ();
638  size_t new_curr = CRS_rowptr[0];
639  size_t old_curr = CRS_rowptr[0];
640 
641  const bool permute_values_array = CRS_vals.size() > 0;
642 
643  for(size_t i = 0; i < NumRows; i++){
644  const size_t old_rowptr_i=CRS_rowptr[i];
645  CRS_rowptr[i] = old_curr;
646  if(old_rowptr_i >= nnz) continue;
647 
648  size_t NumEntries = CRS_rowptr[i+1] - old_rowptr_i;
649  Teuchos::ArrayRCP<Scalar> locValues;
650  if (permute_values_array)
651  locValues = Teuchos::arcp<Scalar>(&CRS_vals[old_rowptr_i], 0, NumEntries, false);
652  Teuchos::ArrayRCP<Ordinal> locIndices(&CRS_colind[old_rowptr_i], 0, NumEntries, false);
653 
654  // Sort phase
655  Ordinal n = NumEntries;
656  Ordinal m = n/2;
657 
658  while(m > 0) {
659  Ordinal max = n - m;
660  for(Ordinal j = 0; j < max; j++) {
661  for(Ordinal k = j; k >= 0; k-=m) {
662  if(locIndices[k+m] >= locIndices[k])
663  break;
664  if (permute_values_array) {
665  Scalar dtemp = locValues[k+m];
666  locValues[k+m] = locValues[k];
667  locValues[k] = dtemp;
668  }
669  Ordinal itemp = locIndices[k+m];
670  locIndices[k+m] = locIndices[k];
671  locIndices[k] = itemp;
672  }
673  }
674  m = m/2;
675  }
676 
677  // Merge & shrink
678  for(size_t j=old_rowptr_i; j < CRS_rowptr[i+1]; j++) {
679  if(j > old_rowptr_i && CRS_colind[j]==CRS_colind[new_curr-1]) {
680  if (permute_values_array) CRS_vals[new_curr-1] += CRS_vals[j];
681  }
682  else if(new_curr==j) {
683  new_curr++;
684  }
685  else {
686  CRS_colind[new_curr] = CRS_colind[j];
687  if (permute_values_array) CRS_vals[new_curr] = CRS_vals[j];
688  new_curr++;
689  }
690  }
691  old_curr=new_curr;
692  }
693 
694  CRS_rowptr[NumRows] = new_curr;
695 }
696 
697 template<typename Ordinal>
698 void
699 sortAndMergeCrsEntries (const Teuchos::ArrayView<size_t> &CRS_rowptr,
700  const Teuchos::ArrayView<Ordinal> & CRS_colind)
701 {
702  Teuchos::ArrayView<Tpetra::Details::DefaultTypes::scalar_type> CRS_vals;
703  return sortAndMergeCrsEntries(CRS_rowptr, CRS_colind, CRS_vals);
704 }
705 
706 template<class rowptr_view_type, class colind_view_type, class vals_view_type>
707 void
708 sortAndMergeCrsEntries(rowptr_view_type& CRS_rowptr,
709  colind_view_type& CRS_colind,
710  vals_view_type& CRS_vals)
711 {
712  using execution_space = typename vals_view_type::execution_space;
713 
714  auto CRS_rowptr_in = CRS_rowptr;
715  auto CRS_colind_in = CRS_colind;
716  auto CRS_vals_in = CRS_vals;
717 
718  KokkosSparse::sort_and_merge_matrix<execution_space, rowptr_view_type,
719  colind_view_type, vals_view_type>(CRS_rowptr_in, CRS_colind_in, CRS_vals_in,
720  CRS_rowptr, CRS_colind, CRS_vals);
721 }
722 
723 
724 template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
725 void
726 lowCommunicationMakeColMapAndReindexSerial (const Teuchos::ArrayView<const size_t> &rowptr,
727  const Teuchos::ArrayView<LocalOrdinal> &colind_LID,
728  const Teuchos::ArrayView<GlobalOrdinal> &colind_GID,
729  const Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >& domainMapRCP,
730  const Teuchos::ArrayView<const int> &owningPIDs,
731  Teuchos::Array<int> &remotePIDs,
732  Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > & colMap)
733 {
734  using Teuchos::rcp;
735  typedef LocalOrdinal LO;
736  typedef GlobalOrdinal GO;
737  typedef Tpetra::global_size_t GST;
738  typedef Tpetra::Map<LO, GO, Node> map_type;
739  const char prefix[] = "lowCommunicationMakeColMapAndReindexSerial: ";
740 
741  // The domainMap is an RCP because there is a shortcut for a
742  // (common) special case to return the columnMap = domainMap.
743  const map_type& domainMap = *domainMapRCP;
744 
745  // Scan all column indices and sort into two groups:
746  // Local: those whose GID matches a GID of the domain map on this processor and
747  // Remote: All others.
748  const size_t numDomainElements = domainMap.getLocalNumElements ();
749  Teuchos::Array<bool> LocalGIDs;
750  if (numDomainElements > 0) {
751  LocalGIDs.resize (numDomainElements, false); // Assume domain GIDs are not local
752  }
753 
754  // In principle it is good to have RemoteGIDs and RemotGIDList be as
755  // long as the number of remote GIDs on this processor, but this
756  // would require two passes through the column IDs, so we make it
757  // the max of 100 and the number of block rows.
758  //
759  // FIXME (mfh 11 Feb 2015) Tpetra::Details::HashTable can hold at
760  // most INT_MAX entries, but it's possible to have more rows than
761  // that (if size_t is 64 bits and int is 32 bits).
762  const size_t numMyRows = rowptr.size () - 1;
763  const int hashsize = std::max (static_cast<int> (numMyRows), 100);
764 
765  Tpetra::Details::HashTable<GO, LO> RemoteGIDs (hashsize);
766  Teuchos::Array<GO> RemoteGIDList;
767  RemoteGIDList.reserve (hashsize);
768  Teuchos::Array<int> PIDList;
769  PIDList.reserve (hashsize);
770 
771  // Here we start using the *LocalOrdinal* colind_LID array. This is
772  // safe even if both columnIndices arrays are actually the same
773  // (because LocalOrdinal==GO). For *local* GID's set
774  // colind_LID with with their LID in the domainMap. For *remote*
775  // GIDs, we set colind_LID with (numDomainElements+NumRemoteColGIDs)
776  // before the increment of the remote count. These numberings will
777  // be separate because no local LID is greater than
778  // numDomainElements.
779 
780  size_t NumLocalColGIDs = 0;
781  LO NumRemoteColGIDs = 0;
782  for (size_t i = 0; i < numMyRows; ++i) {
783  for(size_t j = rowptr[i]; j < rowptr[i+1]; ++j) {
784  const GO GID = colind_GID[j];
785  // Check if GID matches a row GID
786  const LO LID = domainMap.getLocalElement (GID);
787  if(LID != -1) {
788  const bool alreadyFound = LocalGIDs[LID];
789  if (! alreadyFound) {
790  LocalGIDs[LID] = true; // There is a column in the graph associated with this domain map GID
791  NumLocalColGIDs++;
792  }
793  colind_LID[j] = LID;
794  }
795  else {
796  const LO hash_value = RemoteGIDs.get (GID);
797  if (hash_value == -1) { // This means its a new remote GID
798  const int PID = owningPIDs[j];
799  TEUCHOS_TEST_FOR_EXCEPTION(
800  PID == -1, std::invalid_argument, prefix << "Cannot figure out if "
801  "PID is owned.");
802  colind_LID[j] = static_cast<LO> (numDomainElements + NumRemoteColGIDs);
803  RemoteGIDs.add (GID, NumRemoteColGIDs);
804  RemoteGIDList.push_back (GID);
805  PIDList.push_back (PID);
806  NumRemoteColGIDs++;
807  }
808  else {
809  colind_LID[j] = static_cast<LO> (numDomainElements + hash_value);
810  }
811  }
812  }
813  }
814 
815  // Possible short-circuit: If all domain map GIDs are present as
816  // column indices, then set ColMap=domainMap and quit.
817  if (domainMap.getComm ()->getSize () == 1) {
818  // Sanity check: When there is only one process, there can be no
819  // remoteGIDs.
820  TEUCHOS_TEST_FOR_EXCEPTION(
821  NumRemoteColGIDs != 0, std::runtime_error, prefix << "There is only one "
822  "process in the domain Map's communicator, which means that there are no "
823  "\"remote\" indices. Nevertheless, some column indices are not in the "
824  "domain Map.");
825  if (static_cast<size_t> (NumLocalColGIDs) == numDomainElements) {
826  // In this case, we just use the domainMap's indices, which is,
827  // not coincidently, what we clobbered colind with up above
828  // anyway. No further reindexing is needed.
829  colMap = domainMapRCP;
830  return;
831  }
832  }
833 
834  // Now build the array containing column GIDs
835  // Build back end, containing remote GIDs, first
836  const LO numMyCols = NumLocalColGIDs + NumRemoteColGIDs;
837  Teuchos::Array<GO> ColIndices;
838  GO* RemoteColIndices = NULL;
839  if (numMyCols > 0) {
840  ColIndices.resize (numMyCols);
841  if (NumLocalColGIDs != static_cast<size_t> (numMyCols)) {
842  RemoteColIndices = &ColIndices[NumLocalColGIDs]; // Points to back half of ColIndices
843  }
844  }
845 
846  for (LO i = 0; i < NumRemoteColGIDs; ++i) {
847  RemoteColIndices[i] = RemoteGIDList[i];
848  }
849 
850  // Build permute array for *remote* reindexing.
851  Teuchos::Array<LO> RemotePermuteIDs (NumRemoteColGIDs);
852  for (LO i = 0; i < NumRemoteColGIDs; ++i) {
853  RemotePermuteIDs[i]=i;
854  }
855 
856  // Sort External column indices so that all columns coming from a
857  // given remote processor are contiguous. This is a sort with two
858  // auxilary arrays: RemoteColIndices and RemotePermuteIDs.
859  Tpetra::sort3 (PIDList.begin (), PIDList.end (),
860  ColIndices.begin () + NumLocalColGIDs,
861  RemotePermuteIDs.begin ());
862 
863  // Stash the RemotePIDs.
864  //
865  // Note: If Teuchos::Array had a shrink_to_fit like std::vector,
866  // we'd call it here.
867  remotePIDs = PIDList;
868 
869  // Sort external column indices so that columns from a given remote
870  // processor are not only contiguous but also in ascending
871  // order. NOTE: I don't know if the number of externals associated
872  // with a given remote processor is known at this point ... so I
873  // count them here.
874 
875  // NTS: Only sort the RemoteColIndices this time...
876  LO StartCurrent = 0, StartNext = 1;
877  while (StartNext < NumRemoteColGIDs) {
878  if (PIDList[StartNext]==PIDList[StartNext-1]) {
879  StartNext++;
880  }
881  else {
882  Tpetra::sort2 (ColIndices.begin () + NumLocalColGIDs + StartCurrent,
883  ColIndices.begin () + NumLocalColGIDs + StartNext,
884  RemotePermuteIDs.begin () + StartCurrent);
885  StartCurrent = StartNext;
886  StartNext++;
887  }
888  }
889  Tpetra::sort2 (ColIndices.begin () + NumLocalColGIDs + StartCurrent,
890  ColIndices.begin () + NumLocalColGIDs + StartNext,
891  RemotePermuteIDs.begin () + StartCurrent);
892 
893  // Reverse the permutation to get the information we actually care about
894  Teuchos::Array<LO> ReverseRemotePermuteIDs (NumRemoteColGIDs);
895  for (LO i = 0; i < NumRemoteColGIDs; ++i) {
896  ReverseRemotePermuteIDs[RemotePermuteIDs[i]] = i;
897  }
898 
899  // Build permute array for *local* reindexing.
900  bool use_local_permute = false;
901  Teuchos::Array<LO> LocalPermuteIDs (numDomainElements);
902 
903  // Now fill front end. Two cases:
904  //
905  // (1) If the number of Local column GIDs is the same as the number
906  // of Local domain GIDs, we can simply read the domain GIDs into
907  // the front part of ColIndices, otherwise
908  //
909  // (2) We step through the GIDs of the domainMap, checking to see if
910  // each domain GID is a column GID. we want to do this to
911  // maintain a consistent ordering of GIDs between the columns
912  // and the domain.
913  Teuchos::ArrayView<const GO> domainGlobalElements = domainMap.getLocalElementList();
914  if (static_cast<size_t> (NumLocalColGIDs) == numDomainElements) {
915  if (NumLocalColGIDs > 0) {
916  // Load Global Indices into first numMyCols elements column GID list
917  std::copy (domainGlobalElements.begin (), domainGlobalElements.end (),
918  ColIndices.begin ());
919  }
920  }
921  else {
922  LO NumLocalAgain = 0;
923  use_local_permute = true;
924  for (size_t i = 0; i < numDomainElements; ++i) {
925  if (LocalGIDs[i]) {
926  LocalPermuteIDs[i] = NumLocalAgain;
927  ColIndices[NumLocalAgain++] = domainGlobalElements[i];
928  }
929  }
930  TEUCHOS_TEST_FOR_EXCEPTION(
931  static_cast<size_t> (NumLocalAgain) != NumLocalColGIDs,
932  std::runtime_error, prefix << "Local ID count test failed.");
933  }
934 
935  // Make column Map
936  const GST minus_one = Teuchos::OrdinalTraits<GST>::invalid ();
937  colMap = rcp (new map_type (minus_one, ColIndices, domainMap.getIndexBase (),
938  domainMap.getComm ()));
939 
940  // Low-cost reindex of the matrix
941  for (size_t i = 0; i < numMyRows; ++i) {
942  for (size_t j = rowptr[i]; j < rowptr[i+1]; ++j) {
943  const LO ID = colind_LID[j];
944  if (static_cast<size_t> (ID) < numDomainElements) {
945  if (use_local_permute) {
946  colind_LID[j] = LocalPermuteIDs[colind_LID[j]];
947  }
948  // In the case where use_local_permute==false, we just copy
949  // the DomainMap's ordering, which it so happens is what we
950  // put in colind_LID to begin with.
951  }
952  else {
953  colind_LID[j] = NumLocalColGIDs + ReverseRemotePermuteIDs[colind_LID[j]-numDomainElements];
954  }
955  }
956  }
957 }
958 
959 
960 template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
961 void
962 lowCommunicationMakeColMapAndReindex (
963  const Teuchos::ArrayView<const size_t> &rowptr,
964  const Teuchos::ArrayView<LocalOrdinal> &colind_LID,
965  const Teuchos::ArrayView<GlobalOrdinal> &colind_GID,
966  const Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >& domainMapRCP,
967  const Teuchos::ArrayView<const int> &owningPIDs,
968  Teuchos::Array<int> &remotePIDs,
969  Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > & colMap)
970 {
971  using Teuchos::rcp;
972  typedef LocalOrdinal LO;
973  typedef GlobalOrdinal GO;
974  typedef Tpetra::global_size_t GST;
975  typedef Tpetra::Map<LO, GO, Node> map_type;
976  const char prefix[] = "lowCommunicationMakeColMapAndReindex: ";
977 
978  typedef typename Node::device_type DT;
979  using execution_space = typename DT::execution_space;
980  execution_space exec;
981  using team_policy = Kokkos::TeamPolicy<execution_space, Kokkos::Schedule<Kokkos::Dynamic>>;
982  typedef typename map_type::local_map_type local_map_type;
983 
984  // Create device mirror and host mirror views from function parameters
985  // When we pass in views instead of Teuchos::ArrayViews, we can avoid copying views
986  auto colind_LID_view = Details::create_mirror_view_from_raw_host_array(exec, colind_LID.getRawPtr(), colind_LID.size(), true, "colind_LID");
987  auto rowptr_view = Details::create_mirror_view_from_raw_host_array(exec, rowptr.getRawPtr(), rowptr.size(), true, "rowptr");
988  auto colind_GID_view = Details::create_mirror_view_from_raw_host_array(exec, colind_GID.getRawPtr(), colind_GID.size(), true, "colind_GID");
989  auto owningPIDs_view = Details::create_mirror_view_from_raw_host_array(exec, owningPIDs.getRawPtr(), owningPIDs.size(), true, "owningPIDs");
990 
991  typename decltype(colind_LID_view)::HostMirror colind_LID_host(colind_LID.getRawPtr(), colind_LID.size());
992  typename decltype(colind_GID_view)::HostMirror colind_GID_host(colind_GID.getRawPtr(), colind_GID.size());
993 
994  Kokkos::deep_copy(colind_LID_view, colind_LID_host);
995  Kokkos::deep_copy(colind_GID_view, colind_GID_host);
996 
997  // The domainMap is an RCP because there is a shortcut for a
998  // (common) special case to return the columnMap = domainMap.
999  const map_type& domainMap = *domainMapRCP;
1000 
1001  Kokkos::UnorderedMap<LO, bool, DT> LocalGIDs_view_map(colind_LID.size());
1002  Kokkos::UnorderedMap<GO, LO, DT> RemoteGIDs_view_map(colind_LID.size());
1003 
1004  const size_t numMyRows = rowptr.size () - 1;
1005  local_map_type domainMap_local = domainMap.getLocalMap();
1006 
1007  const size_t numDomainElements = domainMap.getLocalNumElements ();
1008  Kokkos::View<bool*, DT> LocalGIDs_view("LocalGIDs", numDomainElements);
1009  auto LocalGIDs_host = Kokkos::create_mirror_view(LocalGIDs_view);
1010 
1011  size_t NumLocalColGIDs = 0;
1012 
1013  // Scan all column indices and sort into two groups:
1014  // Local: those whose GID matches a GID of the domain map on this processor and
1015  // Remote: All others.
1016  // Kokkos::Parallel_reduce sums up NumLocalColGIDs, while we use the size of the Remote GIDs map to find NumRemoteColGIDs
1017  Kokkos::parallel_reduce(team_policy(numMyRows, Kokkos::AUTO), KOKKOS_LAMBDA(const typename team_policy::member_type &member, size_t &update) {
1018  const int i = member.league_rank();
1019  size_t NumLocalColGIDs_temp = 0;
1020  size_t rowptr_start = rowptr_view[i];
1021  size_t rowptr_end = rowptr_view[i+1];
1022  Kokkos::parallel_reduce(Kokkos::TeamThreadRange(member, rowptr_start, rowptr_end), [&](const size_t j, size_t &innerUpdate) {
1023  const GO GID = colind_GID_view[j];
1024  // Check if GID matches a row GID in local domain map
1025  const LO LID = domainMap_local.getLocalElement (GID);
1026  if (LID != -1) {
1027  auto outcome = LocalGIDs_view_map.insert(LID);
1028  // Fresh insert
1029  if(outcome.success()) {
1030  LocalGIDs_view[LID] = true;
1031  innerUpdate++;
1032  }
1033  }
1034  else {
1035  const int PID = owningPIDs_view[j];
1036  auto outcome = RemoteGIDs_view_map.insert(GID, PID);
1037  if(outcome.success() && PID == -1) {
1038  Kokkos::abort("Cannot figure out if ID is owned.\n");
1039  }
1040  }
1041  }, NumLocalColGIDs_temp);
1042  if(member.team_rank() == 0) update += NumLocalColGIDs_temp;
1043  }, NumLocalColGIDs);
1044 
1045  LO NumRemoteColGIDs = RemoteGIDs_view_map.size();
1046 
1047  Kokkos::View<int*, DT> PIDList_view("PIDList", NumRemoteColGIDs);
1048  auto PIDList_host = Kokkos::create_mirror_view(PIDList_view);
1049 
1050  Kokkos::View<GO*, DT> RemoteGIDList_view("RemoteGIDList", NumRemoteColGIDs);
1051  auto RemoteGIDList_host = Kokkos::create_mirror_view(RemoteGIDList_view);
1052 
1053  // For each index in RemoteGIDs_map that contains a GID, use "update" to indicate the number of GIDs "before" this GID
1054  // This maps each element in the RemoteGIDs hash table to an index in RemoteGIDList / PIDList without any overwriting or empty spaces between indices
1055  Kokkos::parallel_scan(Kokkos::RangePolicy<execution_space>(0, RemoteGIDs_view_map.capacity()), KOKKOS_LAMBDA(const int i, GO& update, const bool final) {
1056  if(final && RemoteGIDs_view_map.valid_at(i)) {
1057  RemoteGIDList_view[update] = RemoteGIDs_view_map.key_at(i);
1058  PIDList_view[update] = RemoteGIDs_view_map.value_at(i);
1059  }
1060  if(RemoteGIDs_view_map.valid_at(i)) {
1061  update += 1;
1062  }
1063  });
1064 
1065  // Possible short-circuit: If all domain map GIDs are present as
1066  // column indices, then set ColMap=domainMap and quit.
1067  if (domainMap.getComm ()->getSize () == 1) {
1068  // Sanity check: When there is only one process, there can be no
1069  // remoteGIDs.
1070  TEUCHOS_TEST_FOR_EXCEPTION(
1071  NumRemoteColGIDs != 0, std::runtime_error, prefix << "There is only one "
1072  "process in the domain Map's communicator, which means that there are no "
1073  "\"remote\" indices. Nevertheless, some column indices are not in the "
1074  "domain Map.");
1075  if (static_cast<size_t> (NumLocalColGIDs) == numDomainElements) {
1076  // In this case, we just use the domainMap's indices, which is,
1077  // not coincidently, what we clobbered colind with up above
1078  // anyway. No further reindexing is needed.
1079  colMap = domainMapRCP;
1080 
1081  // Fill out local colMap (which should only contain local GIDs)
1082  auto localColMap = colMap->getLocalMap();
1083  Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0, colind_GID.size()), KOKKOS_LAMBDA(const int i) {
1084  colind_LID_view[i] = localColMap.getLocalElement(colind_GID_view[i]);
1085  });
1086  Kokkos::deep_copy(execution_space(), colind_LID_host, colind_LID_view);
1087  return;
1088  }
1089  }
1090 
1091  // Now build the array containing column GIDs
1092  // Build back end, containing remote GIDs, first
1093  const LO numMyCols = NumLocalColGIDs + NumRemoteColGIDs;
1094  Kokkos::View<GO*, DT> ColIndices_view("ColIndices", numMyCols);
1095 
1096  // We don't need to load the backend of ColIndices or sort if there are no remote GIDs
1097  if(NumRemoteColGIDs > 0) {
1098  if(NumLocalColGIDs != static_cast<size_t> (numMyCols)) {
1099  Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0, NumRemoteColGIDs), KOKKOS_LAMBDA(const int i) {
1100  ColIndices_view[NumLocalColGIDs+i] = RemoteGIDList_view[i];
1101  });
1102  }
1103 
1104  // Find the largest PID for bin sorting purposes
1105  int PID_max = 0;
1106  Kokkos::parallel_reduce(Kokkos::RangePolicy<execution_space>(0, PIDList_host.size()), KOKKOS_LAMBDA(const int i, int& max) {
1107  if(max < PIDList_view[i]) max = PIDList_view[i];
1108  }, Kokkos::Max<int>(PID_max));
1109 
1110  using KeyViewTypePID = decltype(PIDList_view);
1111  using BinSortOpPID = Kokkos::BinOp1D<KeyViewTypePID>;
1112 
1113  // Make a subview of ColIndices for remote GID sorting
1114  auto ColIndices_subview = Kokkos::subview(ColIndices_view, Kokkos::make_pair(NumLocalColGIDs, ColIndices_view.size()));
1115 
1116  // Make binOp with bins = PID_max + 1, min = 0, max = PID_max
1117  BinSortOpPID binOp2(PID_max+1, 0, PID_max);
1118 
1119  // Sort External column indices so that all columns coming from a
1120  // given remote processor are contiguous. This is a sort with one
1121  // auxilary array: RemoteColIndices
1122  Kokkos::BinSort<KeyViewTypePID, BinSortOpPID> bin_sort2(PIDList_view, 0, PIDList_view.size(), binOp2, false);
1123  bin_sort2.create_permute_vector(exec);
1124  bin_sort2.sort(exec, PIDList_view);
1125  bin_sort2.sort(exec, ColIndices_subview);
1126 
1127  // Deep copy back from device to host
1128  Kokkos::deep_copy(exec, PIDList_host, PIDList_view);
1129 
1130  // Stash the RemotePIDs. Once remotePIDs is changed to become a Kokkos view, we can remove this and copy directly.
1131  // Note: If Teuchos::Array had a shrink_to_fit like std::vector,
1132  // we'd call it here.
1133 
1134  exec.fence("fence before setting PIDList");
1135  Teuchos::Array<int> PIDList(NumRemoteColGIDs);
1136  for(LO i = 0; i < NumRemoteColGIDs; ++i) {
1137  PIDList[i] = PIDList_host[i];
1138  }
1139 
1140  remotePIDs = PIDList;
1141 
1142  // Sort external column indices so that columns from a given remote
1143  // processor are not only contiguous but also in ascending
1144  // order. NOTE: I don't know if the number of externals associated
1145  // with a given remote processor is known at this point ... so I
1146  // count them here.
1147  LO StartCurrent = 0, StartNext = 1;
1148  while(StartNext < NumRemoteColGIDs) {
1149  if(PIDList_host[StartNext] == PIDList_host[StartNext-1]) {
1150  StartNext++;
1151  }
1152  else {
1153  Kokkos::sort(ColIndices_view, NumLocalColGIDs + StartCurrent, NumLocalColGIDs + StartNext);
1154  StartCurrent = StartNext;
1155  StartNext++;
1156  }
1157  }
1158 
1159  Kokkos::sort(ColIndices_view, NumLocalColGIDs + StartCurrent, NumLocalColGIDs + StartNext);
1160  }
1161 
1162 
1163  // Build permute array for *local* reindexing.
1164 
1165  // Now fill front end. Two cases:
1166  //
1167  // (1) If the number of Local column GIDs is the same as the number
1168  // of Local domain GIDs, we can simply read the domain GIDs into
1169  // the front part of ColIndices, otherwise
1170  //
1171  // (2) We step through the GIDs of the domainMap, checking to see if
1172  // each domain GID is a column GID. we want to do this to
1173  // maintain a consistent ordering of GIDs between the columns
1174  // and the domain.
1175  if (static_cast<size_t> (NumLocalColGIDs) == numDomainElements) {
1176  if (NumLocalColGIDs > 0) {
1177  // Load Global Indices into first numMyCols elements column GID list
1178  Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0, numDomainElements), KOKKOS_LAMBDA(const int i) {
1179  ColIndices_view[i] = domainMap_local.getGlobalElement(i);
1180  });
1181  }
1182  }
1183  else {
1184  // This part isn't actually tested in the unit tests
1185  LO NumLocalAgain = 0;
1186  Kokkos::parallel_scan(Kokkos::RangePolicy<execution_space>(0, numDomainElements), KOKKOS_LAMBDA(const int i, LO& update, const bool final) {
1187  if(final && LocalGIDs_view[i]) {
1188  ColIndices_view[update] = domainMap_local.getGlobalElement(i);
1189  }
1190  if(LocalGIDs_view[i]) {
1191  update++;
1192  }
1193  }, NumLocalAgain);
1194 
1195 
1196  TEUCHOS_TEST_FOR_EXCEPTION(
1197  static_cast<size_t> (NumLocalAgain) != NumLocalColGIDs,
1198  std::runtime_error, prefix << "Local ID count test failed.");
1199  }
1200 
1201  // Make column Map
1202  const GST minus_one = Teuchos::OrdinalTraits<GST>::invalid ();
1203 
1204  colMap = rcp (new map_type (minus_one, ColIndices_view, domainMap.getIndexBase (),
1205  domainMap.getComm ()));
1206 
1207  // Fill out colind_LID using local map
1208  auto localColMap = colMap->getLocalMap();
1209  Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0, colind_GID.size()), KOKKOS_LAMBDA(const int i) {
1210  colind_LID_view[i] = localColMap.getLocalElement(colind_GID_view[i]);
1211  });
1212 
1213  // For now, we copy back into colind_LID_host (which also overwrites the colind_LID Tuechos array)
1214  // When colind_LID becomes a Kokkos View we can delete this
1215  Kokkos::deep_copy(exec, colind_LID_host, colind_LID_view);
1216 }
1217 
1218 template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
1219 void
1220 lowCommunicationMakeColMapAndReindex (
1221  const Kokkos::View<size_t*,typename Node::device_type> rowptr_view,
1222  const Kokkos::View<LocalOrdinal*,typename Node::device_type> colind_LID_view,
1223  const Kokkos::View<GlobalOrdinal*,typename Node::device_type> colind_GID_view,
1224  const Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >& domainMapRCP,
1225  const Kokkos::View<int*,typename Node::device_type> owningPIDs_view,
1226  Teuchos::Array<int> &remotePIDs,
1227  Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > & colMap)
1228 {
1229  using Teuchos::rcp;
1230  typedef LocalOrdinal LO;
1231  typedef GlobalOrdinal GO;
1232  typedef Tpetra::global_size_t GST;
1233  typedef Tpetra::Map<LO, GO, Node> map_type;
1234  const char prefix[] = "lowCommunicationMakeColMapAndReindex: ";
1235 
1236  typedef typename Node::device_type DT;
1237  using execution_space = typename DT::execution_space;
1238  execution_space exec;
1239  using team_policy = Kokkos::TeamPolicy<execution_space, Kokkos::Schedule<Kokkos::Dynamic>>;
1240  typedef typename map_type::local_map_type local_map_type;
1241 
1242  // The domainMap is an RCP because there is a shortcut for a
1243  // (common) special case to return the columnMap = domainMap.
1244  const map_type& domainMap = *domainMapRCP;
1245 
1246  Kokkos::UnorderedMap<LO, bool, DT> LocalGIDs_view_map(colind_LID_view.size());
1247  Kokkos::UnorderedMap<GO, LO, DT> RemoteGIDs_view_map(colind_LID_view.size());
1248 
1249  const size_t numMyRows = rowptr_view.size () - 1;
1250  local_map_type domainMap_local = domainMap.getLocalMap();
1251 
1252  const size_t numDomainElements = domainMap.getLocalNumElements ();
1253  Kokkos::View<bool*, DT> LocalGIDs_view("LocalGIDs", numDomainElements);
1254  auto LocalGIDs_host = Kokkos::create_mirror_view(LocalGIDs_view);
1255 
1256  size_t NumLocalColGIDs = 0;
1257 
1258  // Scan all column indices and sort into two groups:
1259  // Local: those whose GID matches a GID of the domain map on this processor and
1260  // Remote: All others.
1261  // Kokkos::Parallel_reduce sums up NumLocalColGIDs, while we use the size of the Remote GIDs map to find NumRemoteColGIDs
1262  Kokkos::parallel_reduce(team_policy(numMyRows, Kokkos::AUTO), KOKKOS_LAMBDA(const typename team_policy::member_type &member, size_t &update) {
1263  const int i = member.league_rank();
1264  size_t NumLocalColGIDs_temp = 0;
1265  size_t rowptr_start = rowptr_view[i];
1266  size_t rowptr_end = rowptr_view[i+1];
1267  Kokkos::parallel_reduce(Kokkos::TeamThreadRange(member, rowptr_start, rowptr_end), [&](const size_t j, size_t &innerUpdate) {
1268  const GO GID = colind_GID_view[j];
1269  // Check if GID matches a row GID in local domain map
1270  const LO LID = domainMap_local.getLocalElement (GID);
1271  if (LID != -1) {
1272  auto outcome = LocalGIDs_view_map.insert(LID);
1273  // Fresh insert
1274  if(outcome.success()) {
1275  LocalGIDs_view[LID] = true;
1276  innerUpdate++;
1277  }
1278  }
1279  else {
1280  const int PID = owningPIDs_view[j];
1281  auto outcome = RemoteGIDs_view_map.insert(GID, PID);
1282  if(outcome.success() && PID == -1) {
1283  Kokkos::abort("Cannot figure out if ID is owned.\n");
1284  }
1285  }
1286  }, NumLocalColGIDs_temp);
1287  if(member.team_rank() == 0) update += NumLocalColGIDs_temp;
1288  }, NumLocalColGIDs);
1289 
1290  LO NumRemoteColGIDs = RemoteGIDs_view_map.size();
1291 
1292  Kokkos::View<int*, DT> PIDList_view("PIDList_d", NumRemoteColGIDs);
1293 
1294  Kokkos::View<GO*, DT> RemoteGIDList_view("RemoteGIDList", NumRemoteColGIDs);
1295  auto RemoteGIDList_host = Kokkos::create_mirror_view(RemoteGIDList_view);
1296 
1297  // For each index in RemoteGIDs_map that contains a GID, use "update" to indicate the number of GIDs "before" this GID
1298  // This maps each element in the RemoteGIDs hash table to an index in RemoteGIDList / PIDList without any overwriting or empty spaces between indices
1299  Kokkos::parallel_scan(Kokkos::RangePolicy<execution_space>(0, RemoteGIDs_view_map.capacity()), KOKKOS_LAMBDA(const int i, GO& update, const bool final) {
1300  if(final && RemoteGIDs_view_map.valid_at(i)) {
1301  RemoteGIDList_view[update] = RemoteGIDs_view_map.key_at(i);
1302  PIDList_view[update] = RemoteGIDs_view_map.value_at(i);
1303  }
1304  if(RemoteGIDs_view_map.valid_at(i)) {
1305  update += 1;
1306  }
1307  });
1308 
1309  // Possible short-circuit: If all domain map GIDs are present as
1310  // column indices, then set ColMap=domainMap and quit.
1311  if (domainMap.getComm ()->getSize () == 1) {
1312  // Sanity check: When there is only one process, there can be no
1313  // remoteGIDs.
1314  TEUCHOS_TEST_FOR_EXCEPTION(
1315  NumRemoteColGIDs != 0, std::runtime_error, prefix << "There is only one "
1316  "process in the domain Map's communicator, which means that there are no "
1317  "\"remote\" indices. Nevertheless, some column indices are not in the "
1318  "domain Map.");
1319  if (static_cast<size_t> (NumLocalColGIDs) == numDomainElements) {
1320  // In this case, we just use the domainMap's indices, which is,
1321  // not coincidently, what we clobbered colind with up above
1322  // anyway. No further reindexing is needed.
1323  colMap = domainMapRCP;
1324 
1325  // Fill out local colMap (which should only contain local GIDs)
1326  auto localColMap = colMap->getLocalMap();
1327  Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0, colind_GID_view.size()), KOKKOS_LAMBDA(const int i) {
1328  colind_LID_view[i] = localColMap.getLocalElement(colind_GID_view[i]);
1329  });
1330  return;
1331  }
1332  }
1333 
1334  // Now build the array containing column GIDs
1335  // Build back end, containing remote GIDs, first
1336  const LO numMyCols = NumLocalColGIDs + NumRemoteColGIDs;
1337  Kokkos::View<GO*, DT> ColIndices_view("ColIndices", numMyCols);
1338 
1339  // We don't need to load the backend of ColIndices or sort if there are no remote GIDs
1340  if(NumRemoteColGIDs > 0) {
1341  if(NumLocalColGIDs != static_cast<size_t> (numMyCols)) {
1342  Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0, NumRemoteColGIDs), KOKKOS_LAMBDA(const int i) {
1343  ColIndices_view[NumLocalColGIDs+i] = RemoteGIDList_view[i];
1344  });
1345  }
1346 
1347  // Find the largest PID for bin sorting purposes
1348  int PID_max = 0;
1349  Kokkos::parallel_reduce(Kokkos::RangePolicy<execution_space>(0, PIDList_view.size()), KOKKOS_LAMBDA(const int i, int& max) {
1350  if(max < PIDList_view[i]) max = PIDList_view[i];
1351  }, Kokkos::Max<int>(PID_max));
1352 
1353  using KeyViewTypePID = decltype(PIDList_view);
1354  using BinSortOpPID = Kokkos::BinOp1D<KeyViewTypePID>;
1355 
1356  // Make a subview of ColIndices for remote GID sorting
1357  auto ColIndices_subview = Kokkos::subview(ColIndices_view, Kokkos::make_pair(NumLocalColGIDs, ColIndices_view.size()));
1358 
1359  // Make binOp with bins = PID_max + 1, min = 0, max = PID_max
1360  BinSortOpPID binOp2(PID_max+1, 0, PID_max);
1361 
1362  // Sort External column indices so that all columns coming from a
1363  // given remote processor are contiguous. This is a sort with one
1364  // auxilary array: RemoteColIndices
1365  Kokkos::BinSort<KeyViewTypePID, BinSortOpPID> bin_sort2(PIDList_view, 0, PIDList_view.size(), binOp2, false);
1366  bin_sort2.create_permute_vector(exec);
1367  bin_sort2.sort(exec, PIDList_view);
1368  bin_sort2.sort(exec, ColIndices_subview);
1369 
1370  // Deep copy back from device to host
1371  // Stash the RemotePIDs. Once remotePIDs is changed to become a Kokkos view, we can remove this and copy directly.
1372  Teuchos::Array<int> PIDList(NumRemoteColGIDs);
1373  Kokkos::View<int*, Kokkos::HostSpace> PIDList_host(PIDList.data(), PIDList.size());
1374  Kokkos::deep_copy(exec, PIDList_host, PIDList_view);
1375  exec.fence();
1376 
1377  remotePIDs = PIDList;
1378 
1379  // Sort external column indices so that columns from a given remote
1380  // processor are not only contiguous but also in ascending
1381  // order. NOTE: I don't know if the number of externals associated
1382  // with a given remote processor is known at this point ... so I
1383  // count them here.
1384  LO StartCurrent = 0, StartNext = 1;
1385  while(StartNext < NumRemoteColGIDs) {
1386  if(PIDList_host[StartNext] == PIDList_host[StartNext-1]) {
1387  StartNext++;
1388  }
1389  else {
1390  Kokkos::sort(ColIndices_view, NumLocalColGIDs + StartCurrent, NumLocalColGIDs + StartNext);
1391  StartCurrent = StartNext;
1392  StartNext++;
1393  }
1394  }
1395 
1396  Kokkos::sort(ColIndices_view, NumLocalColGIDs + StartCurrent, NumLocalColGIDs + StartNext);
1397  }
1398 
1399 
1400  // Build permute array for *local* reindexing.
1401 
1402  // Now fill front end. Two cases:
1403  //
1404  // (1) If the number of Local column GIDs is the same as the number
1405  // of Local domain GIDs, we can simply read the domain GIDs into
1406  // the front part of ColIndices, otherwise
1407  //
1408  // (2) We step through the GIDs of the domainMap, checking to see if
1409  // each domain GID is a column GID. we want to do this to
1410  // maintain a consistent ordering of GIDs between the columns
1411  // and the domain.
1412  if (static_cast<size_t> (NumLocalColGIDs) == numDomainElements) {
1413  if (NumLocalColGIDs > 0) {
1414  // Load Global Indices into first numMyCols elements column GID list
1415  Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0, numDomainElements), KOKKOS_LAMBDA(const int i) {
1416  ColIndices_view[i] = domainMap_local.getGlobalElement(i);
1417  });
1418  }
1419  }
1420  else {
1421  // This part isn't actually tested in the unit tests
1422  LO NumLocalAgain = 0;
1423  Kokkos::parallel_scan(Kokkos::RangePolicy<execution_space>(0, numDomainElements), KOKKOS_LAMBDA(const int i, LO& update, const bool final) {
1424  if(final && LocalGIDs_view[i]) {
1425  ColIndices_view[update] = domainMap_local.getGlobalElement(i);
1426  }
1427  if(LocalGIDs_view[i]) {
1428  update++;
1429  }
1430  }, NumLocalAgain);
1431 
1432 
1433  TEUCHOS_TEST_FOR_EXCEPTION(
1434  static_cast<size_t> (NumLocalAgain) != NumLocalColGIDs,
1435  std::runtime_error, prefix << "Local ID count test failed.");
1436  }
1437 
1438  // Make column Map
1439  const GST minus_one = Teuchos::OrdinalTraits<GST>::invalid ();
1440 
1441  colMap = rcp (new map_type (minus_one, ColIndices_view, domainMap.getIndexBase (),
1442  domainMap.getComm ()));
1443 
1444  // Fill out colind_LID using local map
1445  auto localColMap = colMap->getLocalMap();
1446  Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0, colind_GID_view.size()), KOKKOS_LAMBDA(const int i) {
1447  colind_LID_view[i] = localColMap.getLocalElement(colind_GID_view[i]);
1448  });
1449 }
1450 
1451 // Generates an list of owning PIDs based on two transfer (aka import/export objects)
1452 // Let:
1453 // OwningMap = useReverseModeForOwnership ? transferThatDefinesOwnership.getTargetMap() : transferThatDefinesOwnership.getSourceMap();
1454 // MapAo = useReverseModeForOwnership ? transferThatDefinesOwnership.getSourceMap() : transferThatDefinesOwnership.getTargetMap();
1455 // MapAm = useReverseModeForMigration ? transferThatDefinesMigration.getTargetMap() : transferThatDefinesMigration.getSourceMap();
1456 // VectorMap = useReverseModeForMigration ? transferThatDefinesMigration.getSourceMap() : transferThatDefinesMigration.getTargetMap();
1457 // Precondition:
1458 // 1) MapAo.isSameAs(*MapAm) - map compatibility between transfers
1459 // 2) VectorMap->isSameAs(*owningPIDs->getMap()) - map compabibility between transfer & vector
1460 // 3) OwningMap->isOneToOne() - owning map is 1-to-1
1461 // --- Precondition 3 is only checked in DEBUG mode ---
1462 // Postcondition:
1463 // owningPIDs[VectorMap->getLocalElement(GID i)] = j iff (OwningMap->isLocalElement(GID i) on rank j)
1464 template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
1465 void getTwoTransferOwnershipVector(const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& transferThatDefinesOwnership,
1466  bool useReverseModeForOwnership,
1467  const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& transferThatDefinesMigration,
1468  bool useReverseModeForMigration,
1472 
1473  Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > OwningMap = useReverseModeForOwnership ?
1474  transferThatDefinesOwnership.getTargetMap() :
1475  transferThatDefinesOwnership.getSourceMap();
1476  Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > MapAo = useReverseModeForOwnership ?
1477  transferThatDefinesOwnership.getSourceMap() :
1478  transferThatDefinesOwnership.getTargetMap();
1479  Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > MapAm = useReverseModeForMigration ?
1480  transferThatDefinesMigration.getTargetMap() :
1481  transferThatDefinesMigration.getSourceMap();
1482  Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > VectorMap = useReverseModeForMigration ?
1483  transferThatDefinesMigration.getSourceMap() :
1484  transferThatDefinesMigration.getTargetMap();
1485 
1486  TEUCHOS_TEST_FOR_EXCEPTION(!MapAo->isSameAs(*MapAm),std::runtime_error,"Tpetra::Import_Util::getTwoTransferOwnershipVector map mismatch between transfers");
1487  TEUCHOS_TEST_FOR_EXCEPTION(!VectorMap->isSameAs(*owningPIDs.getMap()),std::runtime_error,"Tpetra::Import_Util::getTwoTransferOwnershipVector map mismatch transfer and vector");
1488 #ifdef HAVE_TPETRA_DEBUG
1489  TEUCHOS_TEST_FOR_EXCEPTION(!OwningMap->isOneToOne(),std::runtime_error,"Tpetra::Import_Util::getTwoTransferOwnershipVector owner must be 1-to-1");
1490 #endif
1491 
1492  int rank = OwningMap->getComm()->getRank();
1493  // Generate "A" vector and fill it with owning information. We can read this from transferThatDefinesOwnership w/o communication
1494  // Note: Due to the 1-to-1 requirement, several of these options throw
1496  const import_type* ownAsImport = dynamic_cast<const import_type*> (&transferThatDefinesOwnership);
1497  const export_type* ownAsExport = dynamic_cast<const export_type*> (&transferThatDefinesOwnership);
1498 
1499  Teuchos::ArrayRCP<int> pids = temp.getDataNonConst();
1500  Teuchos::ArrayView<int> v_pids = pids();
1501  if(ownAsImport && useReverseModeForOwnership) {TEUCHOS_TEST_FOR_EXCEPTION(1,std::runtime_error,"Tpetra::Import_Util::getTwoTransferOwnershipVector owner must be 1-to-1");}
1502  else if(ownAsImport && !useReverseModeForOwnership) getPids(*ownAsImport,v_pids,false);
1503  else if(ownAsExport && useReverseModeForMigration) {TEUCHOS_TEST_FOR_EXCEPTION(1,std::runtime_error,"Tpetra::Import_Util::getTwoTransferOwnershipVector this option not yet implemented");}
1504  else {TEUCHOS_TEST_FOR_EXCEPTION(1,std::runtime_error,"Tpetra::Import_Util::getTwoTransferOwnershipVector owner must be 1-to-1");}
1505 
1506  const import_type* xferAsImport = dynamic_cast<const import_type*> (&transferThatDefinesMigration);
1507  const export_type* xferAsExport = dynamic_cast<const export_type*> (&transferThatDefinesMigration);
1508  TEUCHOS_TEST_FOR_EXCEPTION(!xferAsImport && !xferAsExport,std::runtime_error,"Tpetra::Import_Util::getTwoTransferOwnershipVector transfer undefined");
1509 
1510  // Migrate from "A" vector to output vector
1511  owningPIDs.putScalar(rank);
1512  if(xferAsImport && useReverseModeForMigration) owningPIDs.doExport(temp,*xferAsImport,Tpetra::REPLACE);
1513  else if(xferAsImport && !useReverseModeForMigration) owningPIDs.doImport(temp,*xferAsImport,Tpetra::REPLACE);
1514  else if(xferAsExport && useReverseModeForMigration) owningPIDs.doImport(temp,*xferAsExport,Tpetra::REPLACE);
1515  else owningPIDs.doExport(temp,*xferAsExport,Tpetra::REPLACE);
1516 
1517 }
1518 
1519 
1520 
1521 } // namespace Import_Util
1522 } // namespace Tpetra
1523 
1524 #endif // TPETRA_IMPORT_UTIL_HPP
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
Add specializations of Teuchos::Details::MpiTypeTraits for Kokkos::complex&lt;float&gt; and Kokkos::complex...
void doImport(const SrcDistObject &source, const Import< LocalOrdinal, GlobalOrdinal, Node > &importer, const CombineMode CM, const bool restrictedMode=false)
Import data into this object using an Import object (&quot;forward mode&quot;).
void putScalar(const Scalar &value)
Set all values in the multivector with the given value.
size_t getLocalNumElements() const
The number of elements belonging to the calling process.
void sort3(const IT1 &first1, const IT1 &last1, const IT2 &first2, const IT3 &first3, const bool stableSort=false)
Sort the first array, and apply the same permutation to the second and third arrays.
Declaration of the Tpetra::CrsMatrix class.
Teuchos::ArrayView< const int > getExportPIDs() const
List of processes to which entries will be sent.
Teuchos::RCP< const map_type > getTargetMap() const
The target Map used to construct this Export or Import.
void sort(View &view, const size_t &size)
Convenience wrapper for std::sort for host-accessible views.
size_t global_size_t
Global size_t object.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2, const bool stableSort=false)
Sort the first array, and apply the resulting permutation to the second array.
size_t getNumExportIDs() const
Number of entries that must be sent by the calling process to other processes.
Declaration and definition of Tpetra::Details::reallocDualViewIfNeeded, an implementation detail of T...
Replace existing values with new values.
void start()
Start the deep_copy counter.
Teuchos::ArrayView< const LO > getExportLIDs() const
List of entries in the source Map that will be sent to other processes.
local_map_type getLocalMap() const
Get the LocalMap for Kokkos-Kernels.
A parallel distribution of indices over processes.
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Export data into this object using an Export object (&quot;forward mode&quot;).
KOKKOS_INLINE_FUNCTION LocalOrdinal getLocalNumElements() const
The number of indices that live on the calling process.
A distributed dense vector.
Stand-alone utility functions and macros.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary...