Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Import_Util2.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_IMPORT_UTIL2_HPP
43 #define TPETRA_IMPORT_UTIL2_HPP
44 
49 
50 #include "Tpetra_ConfigDefs.hpp"
51 #include "Tpetra_Import.hpp"
52 #include "Tpetra_HashTable.hpp"
53 #include "Tpetra_Map.hpp"
54 #include "Tpetra_Util.hpp"
55 #include "Tpetra_Distributor.hpp"
58 #include "Tpetra_Vector.hpp"
59 #include "Kokkos_DualView.hpp"
60 #include "KokkosSparse_SortCrs.hpp"
61 #include <Teuchos_Array.hpp>
63 #include <Kokkos_UnorderedMap.hpp>
64 #include <unordered_map>
65 #include <utility>
66 #include <set>
67 
69 
70 #include <Kokkos_Core.hpp>
71 #include <Kokkos_Sort.hpp>
72 
73 namespace Tpetra {
74 namespace Import_Util {
75 
78 template<typename Scalar, typename Ordinal>
79 void
80 sortCrsEntries (const Teuchos::ArrayView<size_t>& CRS_rowptr,
81  const Teuchos::ArrayView<Ordinal>& CRS_colind,
82  const Teuchos::ArrayView<Scalar>&CRS_vals);
83 
84 template<typename Ordinal>
85 void
86 sortCrsEntries (const Teuchos::ArrayView<size_t>& CRS_rowptr,
87  const Teuchos::ArrayView<Ordinal>& CRS_colind);
88 
89 template<typename rowptr_array_type, typename colind_array_type, typename vals_array_type>
90 void
91 sortCrsEntries (const rowptr_array_type& CRS_rowptr,
92  const colind_array_type& CRS_colind,
93  const vals_array_type& CRS_vals);
94 
95 template<typename rowptr_array_type, typename colind_array_type>
96 void
97 sortCrsEntries (const rowptr_array_type& CRS_rowptr,
98  const colind_array_type& CRS_colind);
99 
104 template<typename Scalar, typename Ordinal>
105 void
106 sortAndMergeCrsEntries (const Teuchos::ArrayView<size_t>& CRS_rowptr,
107  const Teuchos::ArrayView<Ordinal>& CRS_colind,
108  const Teuchos::ArrayView<Scalar>& CRS_vals);
109 
110 template<typename Ordinal>
111 void
112 sortAndMergeCrsEntries (const Teuchos::ArrayView<size_t>& CRS_rowptr,
113  const Teuchos::ArrayView<Ordinal>& CRS_colind);
114 
115 template<class rowptr_view_type, class colind_view_type, class vals_view_type>
116 void
117 sortAndMergeCrsEntries (const rowptr_view_type& CRS_rowptr,
118  const colind_view_type& CRS_colind,
119  const vals_view_type& CRS_vals);
120 
136 template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
137 void
138 lowCommunicationMakeColMapAndReindex (
139  const Teuchos::ArrayView<const size_t> &rowptr,
140  const Teuchos::ArrayView<LocalOrdinal> &colind_LID,
141  const Teuchos::ArrayView<GlobalOrdinal> &colind_GID,
142  const Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >& domainMapRCP,
143  const Teuchos::ArrayView<const int> &owningPIDs,
144  Teuchos::Array<int> &remotePIDs,
145  Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > & colMap);
146 
151 template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
152 void
153 lowCommunicationMakeColMapAndReindex (
154  const Kokkos::View<size_t*,typename Node::device_type> rowptr_view,
155  const Kokkos::View<LocalOrdinal*,typename Node::device_type> colind_LID_view,
156  const Kokkos::View<GlobalOrdinal*,typename Node::device_type> colind_GID_view,
157  const Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >& domainMapRCP,
158  const Teuchos::ArrayView<const int> &owningPIDs,
159  Teuchos::Array<int> &remotePIDs,
160  Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > & colMap);
161 
175  template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
176  void getTwoTransferOwnershipVector(const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& transferThatDefinesOwnership,
177  bool useReverseModeForOwnership,
178  const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& transferForMigratingData,
179  bool useReverseModeForMigration,
181 
182 } // namespace Import_Util
183 } // namespace Tpetra
184 
185 
186 //
187 // Implementations
188 //
189 
190 namespace Tpetra {
191 namespace Import_Util {
192 
193 
194 template<typename PID, typename GlobalOrdinal>
195 bool sort_PID_then_GID(const std::pair<PID,GlobalOrdinal> &a,
196  const std::pair<PID,GlobalOrdinal> &b)
197 {
198  if(a.first!=b.first)
199  return (a.first < b.first);
200  return (a.second < b.second);
201 }
202 
203 template<typename PID,
204  typename GlobalOrdinal,
205  typename LocalOrdinal>
206 bool sort_PID_then_pair_GID_LID(const std::pair<PID, std::pair< GlobalOrdinal, LocalOrdinal > > &a,
207  const std::pair<PID, std::pair< GlobalOrdinal, LocalOrdinal > > &b)
208 {
209  if(a.first!=b.first)
210  return a.first < b.first;
211  else
212  return (a.second.first < b.second.first);
213 }
214 
215 template<typename Scalar,
216  typename LocalOrdinal,
217  typename GlobalOrdinal,
218  typename Node>
219 void
220 reverseNeighborDiscovery(const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> & SourceMatrix,
221  const typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::row_ptrs_host_view_type & rowptr,
222  const typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_inds_host_view_type & colind,
224  Teuchos::RCP<const Tpetra::Import<LocalOrdinal,GlobalOrdinal,Node> > MyImporter,
225  Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > MyDomainMap,
226  Teuchos::ArrayRCP<int>& type3PIDs,
227  Teuchos::ArrayRCP<LocalOrdinal>& type3LIDs,
228  Teuchos::RCP<const Teuchos::Comm<int> >& rcomm)
229 {
230 #ifdef HAVE_TPETRACORE_MPI
231  using Teuchos::TimeMonitor;
232  using ::Tpetra::Details::Behavior;
233  typedef LocalOrdinal LO;
234  typedef GlobalOrdinal GO;
235  typedef std::pair<GO,GO> pidgidpair_t;
236  using Teuchos::RCP;
237  const std::string prefix {" Import_Util2::ReverseND:: "};
238  const std::string label ("IU2::Neighbor");
239 
240  // There can be no neighbor discovery if you don't have an importer
241  if(MyImporter.is_null()) return;
242 
243  std::ostringstream errstr;
244  bool error = false;
245  auto const comm = MyDomainMap->getComm();
246 
247  MPI_Comm rawComm = getRawMpiComm(*comm);
248  const int MyPID = rcomm->getRank ();
249 
250  // Things related to messages I am sending in forward mode (RowTransfer)
251  // *** Note: this will be incorrect for transferAndFillComplete if it is in reverse mode. FIXME cbl.
252  auto ExportPIDs = RowTransfer.getExportPIDs();
253  auto ExportLIDs = RowTransfer.getExportLIDs();
254  auto NumExportLIDs = RowTransfer.getNumExportIDs();
255 
256  Distributor & Distor = MyImporter->getDistributor();
257  const size_t NumRecvs = Distor.getNumReceives();
258  const size_t NumSends = Distor.getNumSends();
259  auto RemoteLIDs = MyImporter->getRemoteLIDs();
260  auto const ProcsFrom = Distor.getProcsFrom();
261  auto const ProcsTo = Distor.getProcsTo();
262 
263  auto LengthsFrom = Distor.getLengthsFrom();
264  auto MyColMap = SourceMatrix.getColMap();
265  const size_t numCols = MyColMap->getLocalNumElements ();
266  RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > target = MyImporter->getTargetMap();
267 
268  // Get the owning pids in a special way,
269  // s.t. ProcsFrom[RemotePIDs[i]] is the proc that owns RemoteLIDs[j]....
270  Teuchos::Array<int> RemotePIDOrder(numCols,-1);
271 
272  // For each remote ID, record index into ProcsFrom, who owns it.
273  for (size_t i = 0, j = 0; i < NumRecvs; ++i) {
274  for (size_t k = 0; k < LengthsFrom[i]; ++k) {
275  const int pid = ProcsFrom[i];
276  if (pid != MyPID) {
277  RemotePIDOrder[RemoteLIDs[j]] = i;
278  }
279  j++;
280  }
281  }
282 
283  // Step One: Start tacking the (GID,PID) pairs on the std sets
284  //
285  // For each index in ProcsFrom, we will insert into a set of (PID,
286  // GID) pairs, in order to build a list of such pairs for each of
287  // those processes. Since this is building a reverse, we will send
288  // to these processes.
289  Teuchos::Array<int> ReverseSendSizes(NumRecvs,0);
290  // do this as C array to avoid Teuchos::Array value initialization of all reserved memory
291  Teuchos::Array< Teuchos::ArrayRCP<pidgidpair_t > > RSB(NumRecvs);
292 
293  {
294 #ifdef HAVE_TPETRA_MMM_TIMINGS
295  TimeMonitor set_all(*TimeMonitor::getNewTimer(prefix + std::string("isMMallSetRSB")));
296 #endif
297 
298  // 25 Jul 2018: CBL
299  // todo:std::unordered_set (hash table),
300  // with an adequate prereservation ("bucket count").
301  // An onordered_set has to have a custom hasher for pid/gid pair
302  // However, when pidsets is copied to RSB, it will be in key
303  // order _not_ in pid,gid order. (unlike std::set).
304  // Impliment this with a reserve, and time BOTH building pidsets
305  // _and_ the sort after the receive. Even if unordered_set saves
306  // time, if it causes the sort to be longer, it's not a win.
307 
308  Teuchos::Array<std::set<pidgidpair_t>> pidsets(NumRecvs);
309  {
310 #ifdef HAVE_TPETRA_MMM_TIMINGS
311  TimeMonitor set_insert(*TimeMonitor::getNewTimer(prefix + std::string("isMMallSetRSBinsert")));
312 #endif
313  for(size_t i=0; i < NumExportLIDs; i++) {
314  LO lid = ExportLIDs[i];
315  GO exp_pid = ExportPIDs[i];
316  for(auto j=rowptr[lid]; j<rowptr[lid+1]; j++){
317  int pid_order = RemotePIDOrder[colind[j]];
318  if(pid_order!=-1) {
319  GO gid = MyColMap->getGlobalElement(colind[j]); //Epetra SM.GCID46 =>sm->graph-> {colmap(colind)}
320  auto tpair = pidgidpair_t(exp_pid,gid);
321  pidsets[pid_order].insert(pidsets[pid_order].end(),tpair);
322  }
323  }
324  }
325  }
326 
327  {
328 #ifdef HAVE_TPETRA_MMM_TIMINGS
329  TimeMonitor set_cpy(*TimeMonitor::getNewTimer(prefix + std::string("isMMallSetRSBcpy")));
330 #endif
331  int jj = 0;
332  for(auto &&ps : pidsets) {
333  auto s = ps.size();
334  RSB[jj] = Teuchos::arcp(new pidgidpair_t[ s ],0, s ,true);
335  std::copy(ps.begin(),ps.end(),RSB[jj]);
336  ReverseSendSizes[jj]=s;
337  ++jj;
338  }
339  }
340  } // end of set based packing.
341 
342  Teuchos::Array<int> ReverseRecvSizes(NumSends,-1);
343  Teuchos::Array<MPI_Request> rawBreq(ProcsFrom.size()+ProcsTo.size(), MPI_REQUEST_NULL);
344  // 25 Jul 2018: MPI_TAG_UB is the largest tag value; could be < 32768.
345  const int mpi_tag_base_ = 3;
346 
347  int mpireq_idx=0;
348  for(int i=0;i<ProcsTo.size();++i) {
349  int Rec_Tag = mpi_tag_base_ + ProcsTo[i];
350  int * thisrecv = (int *) (&ReverseRecvSizes[i]);
351  MPI_Request rawRequest = MPI_REQUEST_NULL;
352  MPI_Irecv(const_cast<int*>(thisrecv),
353  1,
354  MPI_INT,
355  ProcsTo[i],
356  Rec_Tag,
357  rawComm,
358  &rawRequest);
359  rawBreq[mpireq_idx++]=rawRequest;
360  }
361  for(int i=0;i<ProcsFrom.size();++i) {
362  int Send_Tag = mpi_tag_base_ + MyPID;
363  int * mysend = ( int *)(&ReverseSendSizes[i]);
364  MPI_Request rawRequest = MPI_REQUEST_NULL;
365  MPI_Isend(mysend,
366  1,
367  MPI_INT,
368  ProcsFrom[i],
369  Send_Tag,
370  rawComm,
371  &rawRequest);
372  rawBreq[mpireq_idx++]=rawRequest;
373  }
374  Teuchos::Array<MPI_Status> rawBstatus(rawBreq.size());
375 #ifdef HAVE_TPETRA_DEBUG
376  const int err1 =
377 #endif
378  MPI_Waitall (rawBreq.size(), rawBreq.getRawPtr(),
379  rawBstatus.getRawPtr());
380 
381 
382 #ifdef HAVE_TPETRA_DEBUG
383  if(err1) {
384  errstr <<MyPID<< "sE1 reverseNeighborDiscovery Mpi_Waitall error on send ";
385  error=true;
386  std::cerr<<errstr.str()<<std::flush;
387  }
388 #endif
389 
390  int totalexportpairrecsize = 0;
391  for (size_t i = 0; i < NumSends; ++i) {
392  totalexportpairrecsize += ReverseRecvSizes[i];
393 #ifdef HAVE_TPETRA_DEBUG
394  if(ReverseRecvSizes[i]<0) {
395  errstr << MyPID << "E4 reverseNeighborDiscovery: got < 0 for receive size "<<ReverseRecvSizes[i]<<std::endl;
396  error=true;
397  }
398 #endif
399  }
400  Teuchos::ArrayRCP<pidgidpair_t >AllReverseRecv= Teuchos::arcp(new pidgidpair_t[totalexportpairrecsize],0,totalexportpairrecsize,true);
401  int offset = 0;
402  mpireq_idx=0;
403  for(int i=0;i<ProcsTo.size();++i) {
404  int recv_data_size = ReverseRecvSizes[i]*2;
405  int recvData_MPI_Tag = mpi_tag_base_*2 + ProcsTo[i];
406  MPI_Request rawRequest = MPI_REQUEST_NULL;
407  GO * rec_bptr = (GO*) (&AllReverseRecv[offset]);
408  offset+=ReverseRecvSizes[i];
409  MPI_Irecv(rec_bptr,
410  recv_data_size,
411  ::Tpetra::Details::MpiTypeTraits<GO>::getType(rec_bptr[0]),
412  ProcsTo[i],
413  recvData_MPI_Tag,
414  rawComm,
415  &rawRequest);
416  rawBreq[mpireq_idx++]=rawRequest;
417  }
418  for(int ii=0;ii<ProcsFrom.size();++ii) {
419  GO * send_bptr = (GO*) (RSB[ii].getRawPtr());
420  MPI_Request rawSequest = MPI_REQUEST_NULL;
421  int send_data_size = ReverseSendSizes[ii]*2; // 2 == count of pair
422  int sendData_MPI_Tag = mpi_tag_base_*2+MyPID;
423  MPI_Isend(send_bptr,
424  send_data_size,
425  ::Tpetra::Details::MpiTypeTraits<GO>::getType(send_bptr[0]),
426  ProcsFrom[ii],
427  sendData_MPI_Tag,
428  rawComm,
429  &rawSequest);
430 
431  rawBreq[mpireq_idx++]=rawSequest;
432  }
433 #ifdef HAVE_TPETRA_DEBUG
434  const int err =
435 #endif
436  MPI_Waitall (rawBreq.size(),
437  rawBreq.getRawPtr(),
438  rawBstatus.getRawPtr());
439 #ifdef HAVE_TPETRA_DEBUG
440  if(err) {
441  errstr <<MyPID<< "E3.r reverseNeighborDiscovery Mpi_Waitall error on receive ";
442  error=true;
443  std::cerr<<errstr.str()<<std::flush;
444  }
445 #endif
446  std::sort(AllReverseRecv.begin(), AllReverseRecv.end(), Tpetra::Import_Util::sort_PID_then_GID<GlobalOrdinal, GlobalOrdinal>);
447 
448  auto newEndOfPairs = std::unique(AllReverseRecv.begin(), AllReverseRecv.end());
449 // don't resize to remove non-unique, just use the end-of-unique iterator
450  if(AllReverseRecv.begin() == newEndOfPairs) return;
451  int ARRsize = std::distance(AllReverseRecv.begin(),newEndOfPairs);
452  auto rPIDs = Teuchos::arcp(new int[ARRsize],0,ARRsize,true);
453  auto rLIDs = Teuchos::arcp(new LocalOrdinal[ARRsize],0,ARRsize,true);
454 
455  int tsize=0;
456  for(auto itr = AllReverseRecv.begin(); itr!=newEndOfPairs; ++itr ) {
457  if((int)(itr->first) != MyPID) {
458  rPIDs[tsize]=(int)itr->first;
459  LocalOrdinal lid = MyDomainMap->getLocalElement(itr->second);
460  rLIDs[tsize]=lid;
461  tsize++;
462  }
463  }
464 
465  type3PIDs=rPIDs.persistingView(0,tsize);
466  type3LIDs=rLIDs.persistingView(0,tsize);
467 
468  if(error){
469  std::cerr<<errstr.str()<<std::flush;
470  comm->barrier();
471  comm->barrier();
472  comm->barrier();
473  MPI_Abort (MPI_COMM_WORLD, -1);
474  }
475 #endif
476 }
477 
478 // Note: This should get merged with the other Tpetra sort routines eventually.
479 template<typename Scalar, typename Ordinal>
480 void
481 sortCrsEntries (const Teuchos::ArrayView<size_t> &CRS_rowptr,
482  const Teuchos::ArrayView<Ordinal> & CRS_colind,
483  const Teuchos::ArrayView<Scalar> &CRS_vals)
484 {
485  // For each row, sort column entries from smallest to largest.
486  // Use shell sort. Stable sort so it is fast if indices are already sorted.
487  // Code copied from Epetra_CrsMatrix::SortEntries()
488  size_t NumRows = CRS_rowptr.size()-1;
489  size_t nnz = CRS_colind.size();
490 
491  const bool permute_values_array = CRS_vals.size() > 0;
492 
493  for(size_t i = 0; i < NumRows; i++){
494  size_t start=CRS_rowptr[i];
495  if(start >= nnz) continue;
496 
497  size_t NumEntries = CRS_rowptr[i+1] - start;
498  Teuchos::ArrayRCP<Scalar> locValues;
499  if (permute_values_array)
500  locValues = Teuchos::arcp<Scalar>(&CRS_vals[start], 0, NumEntries, false);
501  Teuchos::ArrayRCP<Ordinal> locIndices(&CRS_colind[start], 0, NumEntries, false);
502 
503  Ordinal n = NumEntries;
504  Ordinal m = 1;
505  while (m<n) m = m*3+1;
506  m /= 3;
507 
508  while(m > 0) {
509  Ordinal max = n - m;
510  for(Ordinal j = 0; j < max; j++) {
511  for(Ordinal k = j; k >= 0; k-=m) {
512  if(locIndices[k+m] >= locIndices[k])
513  break;
514  if (permute_values_array) {
515  Scalar dtemp = locValues[k+m];
516  locValues[k+m] = locValues[k];
517  locValues[k] = dtemp;
518  }
519  Ordinal itemp = locIndices[k+m];
520  locIndices[k+m] = locIndices[k];
521  locIndices[k] = itemp;
522  }
523  }
524  m = m/3;
525  }
526  }
527 }
528 
529 template<typename Ordinal>
530 void
531 sortCrsEntries (const Teuchos::ArrayView<size_t> &CRS_rowptr,
532  const Teuchos::ArrayView<Ordinal> & CRS_colind)
533 {
534  // Generate dummy values array
535  Teuchos::ArrayView<Tpetra::Details::DefaultTypes::scalar_type> CRS_vals;
536  sortCrsEntries (CRS_rowptr, CRS_colind, CRS_vals);
537 }
538 
539 namespace Impl {
540 
541 template<class RowOffsetsType, class ColumnIndicesType, class ValuesType>
542 class SortCrsEntries {
543 private:
544  typedef typename ColumnIndicesType::non_const_value_type ordinal_type;
545  typedef typename ValuesType::non_const_value_type scalar_type;
546 
547 public:
548  SortCrsEntries (const RowOffsetsType& ptr,
549  const ColumnIndicesType& ind,
550  const ValuesType& val) :
551  ptr_ (ptr),
552  ind_ (ind),
553  val_ (val)
554  {
555  static_assert (std::is_signed<ordinal_type>::value, "The type of each "
556  "column index -- that is, the type of each entry of ind "
557  "-- must be signed in order for this functor to work.");
558  }
559 
560  KOKKOS_FUNCTION void operator() (const size_t i) const
561  {
562  const size_t nnz = ind_.extent (0);
563  const size_t start = ptr_(i);
564  const bool permute_values_array = val_.extent(0) > 0;
565 
566  if (start < nnz) {
567  const size_t NumEntries = ptr_(i+1) - start;
568 
569  const ordinal_type n = static_cast<ordinal_type> (NumEntries);
570  ordinal_type m = 1;
571  while (m<n) m = m*3+1;
572  m /= 3;
573 
574  while (m > 0) {
575  ordinal_type max = n - m;
576  for (ordinal_type j = 0; j < max; j++) {
577  for (ordinal_type k = j; k >= 0; k -= m) {
578  const size_t sk = start+k;
579  if (ind_(sk+m) >= ind_(sk)) {
580  break;
581  }
582  if (permute_values_array) {
583  const scalar_type dtemp = val_(sk+m);
584  val_(sk+m) = val_(sk);
585  val_(sk) = dtemp;
586  }
587  const ordinal_type itemp = ind_(sk+m);
588  ind_(sk+m) = ind_(sk);
589  ind_(sk) = itemp;
590  }
591  }
592  m = m/3;
593  }
594  }
595  }
596 
597  static void
598  sortCrsEntries (const RowOffsetsType& ptr,
599  const ColumnIndicesType& ind,
600  const ValuesType& val)
601  {
602  // For each row, sort column entries from smallest to largest.
603  // Use shell sort. Stable sort so it is fast if indices are already sorted.
604  // Code copied from Epetra_CrsMatrix::SortEntries()
605  // NOTE: This should not be taken as a particularly efficient way to sort
606  // rows of matrices in parallel. But it is correct, so that's something.
607  if (ptr.extent (0) == 0) {
608  return; // no rows, so nothing to sort
609  }
610  const size_t NumRows = ptr.extent (0) - 1;
611 
612  typedef size_t index_type; // what this function was using; not my choice
613  typedef typename ValuesType::execution_space execution_space;
614  // Specify RangePolicy explicitly, in order to use correct execution
615  // space. See #1345.
616  typedef Kokkos::RangePolicy<execution_space, index_type> range_type;
617  Kokkos::parallel_for ("sortCrsEntries", range_type (0, NumRows),
618  SortCrsEntries (ptr, ind, val));
619  }
620 
621 private:
622  RowOffsetsType ptr_;
623  ColumnIndicesType ind_;
624  ValuesType val_;
625 };
626 
627 } // namespace Impl
628 
629 template<typename rowptr_array_type, typename colind_array_type, typename vals_array_type>
630 void
631 sortCrsEntries (const rowptr_array_type& CRS_rowptr,
632  const colind_array_type& CRS_colind,
633  const vals_array_type& CRS_vals)
634 {
635  Impl::SortCrsEntries<rowptr_array_type, colind_array_type,
636  vals_array_type>::sortCrsEntries (CRS_rowptr, CRS_colind, CRS_vals);
637 }
638 
639 template<typename rowptr_array_type, typename colind_array_type>
640 void
641 sortCrsEntries (const rowptr_array_type& CRS_rowptr,
642  const colind_array_type& CRS_colind)
643 {
644  // Generate dummy values array
645  typedef typename colind_array_type::execution_space execution_space;
646  typedef Tpetra::Details::DefaultTypes::scalar_type scalar_type;
647  typedef typename Kokkos::View<scalar_type*, execution_space> scalar_view_type;
648  scalar_view_type CRS_vals;
649  sortCrsEntries<rowptr_array_type, colind_array_type,
650  scalar_view_type>(CRS_rowptr, CRS_colind, CRS_vals);
651 }
652 
653 // Note: This should get merged with the other Tpetra sort routines eventually.
654 template<typename Scalar, typename Ordinal>
655 void
656 sortAndMergeCrsEntries (const Teuchos::ArrayView<size_t> &CRS_rowptr,
657  const Teuchos::ArrayView<Ordinal> & CRS_colind,
658  const Teuchos::ArrayView<Scalar> &CRS_vals)
659 {
660  // For each row, sort column entries from smallest to largest,
661  // merging column ids that are identify by adding values. Use shell
662  // sort. Stable sort so it is fast if indices are already sorted.
663  // Code copied from Epetra_CrsMatrix::SortEntries()
664 
665  if (CRS_rowptr.size () == 0) {
666  return; // no rows, so nothing to sort
667  }
668  const size_t NumRows = CRS_rowptr.size () - 1;
669  const size_t nnz = CRS_colind.size ();
670  size_t new_curr = CRS_rowptr[0];
671  size_t old_curr = CRS_rowptr[0];
672 
673  const bool permute_values_array = CRS_vals.size() > 0;
674 
675  for(size_t i = 0; i < NumRows; i++){
676  const size_t old_rowptr_i=CRS_rowptr[i];
677  CRS_rowptr[i] = old_curr;
678  if(old_rowptr_i >= nnz) continue;
679 
680  size_t NumEntries = CRS_rowptr[i+1] - old_rowptr_i;
681  Teuchos::ArrayRCP<Scalar> locValues;
682  if (permute_values_array)
683  locValues = Teuchos::arcp<Scalar>(&CRS_vals[old_rowptr_i], 0, NumEntries, false);
684  Teuchos::ArrayRCP<Ordinal> locIndices(&CRS_colind[old_rowptr_i], 0, NumEntries, false);
685 
686  // Sort phase
687  Ordinal n = NumEntries;
688  Ordinal m = n/2;
689 
690  while(m > 0) {
691  Ordinal max = n - m;
692  for(Ordinal j = 0; j < max; j++) {
693  for(Ordinal k = j; k >= 0; k-=m) {
694  if(locIndices[k+m] >= locIndices[k])
695  break;
696  if (permute_values_array) {
697  Scalar dtemp = locValues[k+m];
698  locValues[k+m] = locValues[k];
699  locValues[k] = dtemp;
700  }
701  Ordinal itemp = locIndices[k+m];
702  locIndices[k+m] = locIndices[k];
703  locIndices[k] = itemp;
704  }
705  }
706  m = m/2;
707  }
708 
709  // Merge & shrink
710  for(size_t j=old_rowptr_i; j < CRS_rowptr[i+1]; j++) {
711  if(j > old_rowptr_i && CRS_colind[j]==CRS_colind[new_curr-1]) {
712  if (permute_values_array) CRS_vals[new_curr-1] += CRS_vals[j];
713  }
714  else if(new_curr==j) {
715  new_curr++;
716  }
717  else {
718  CRS_colind[new_curr] = CRS_colind[j];
719  if (permute_values_array) CRS_vals[new_curr] = CRS_vals[j];
720  new_curr++;
721  }
722  }
723  old_curr=new_curr;
724  }
725 
726  CRS_rowptr[NumRows] = new_curr;
727 }
728 
729 template<typename Ordinal>
730 void
731 sortAndMergeCrsEntries (const Teuchos::ArrayView<size_t> &CRS_rowptr,
732  const Teuchos::ArrayView<Ordinal> & CRS_colind)
733 {
734  Teuchos::ArrayView<Tpetra::Details::DefaultTypes::scalar_type> CRS_vals;
735  return sortAndMergeCrsEntries(CRS_rowptr, CRS_colind, CRS_vals);
736 }
737 
738 template<class rowptr_view_type, class colind_view_type, class vals_view_type>
739 void
740 sortAndMergeCrsEntries(rowptr_view_type& CRS_rowptr,
741  colind_view_type& CRS_colind,
742  vals_view_type& CRS_vals)
743 {
744  using execution_space = typename vals_view_type::execution_space;
745 
746  auto CRS_rowptr_in = CRS_rowptr;
747  auto CRS_colind_in = CRS_colind;
748  auto CRS_vals_in = CRS_vals;
749 
750  KokkosSparse::sort_and_merge_matrix<execution_space, rowptr_view_type,
751  colind_view_type, vals_view_type>(CRS_rowptr_in, CRS_colind_in, CRS_vals_in,
752  CRS_rowptr, CRS_colind, CRS_vals);
753 }
754 
755 
756 template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
757 void
758 lowCommunicationMakeColMapAndReindexSerial (const Teuchos::ArrayView<const size_t> &rowptr,
759  const Teuchos::ArrayView<LocalOrdinal> &colind_LID,
760  const Teuchos::ArrayView<GlobalOrdinal> &colind_GID,
761  const Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >& domainMapRCP,
762  const Teuchos::ArrayView<const int> &owningPIDs,
763  Teuchos::Array<int> &remotePIDs,
764  Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > & colMap)
765 {
766  using Teuchos::rcp;
767  typedef LocalOrdinal LO;
768  typedef GlobalOrdinal GO;
769  typedef Tpetra::global_size_t GST;
770  typedef Tpetra::Map<LO, GO, Node> map_type;
771  const char prefix[] = "lowCommunicationMakeColMapAndReindexSerial: ";
772 
773  // The domainMap is an RCP because there is a shortcut for a
774  // (common) special case to return the columnMap = domainMap.
775  const map_type& domainMap = *domainMapRCP;
776 
777  // Scan all column indices and sort into two groups:
778  // Local: those whose GID matches a GID of the domain map on this processor and
779  // Remote: All others.
780  const size_t numDomainElements = domainMap.getLocalNumElements ();
781  Teuchos::Array<bool> LocalGIDs;
782  if (numDomainElements > 0) {
783  LocalGIDs.resize (numDomainElements, false); // Assume domain GIDs are not local
784  }
785 
786  // In principle it is good to have RemoteGIDs and RemotGIDList be as
787  // long as the number of remote GIDs on this processor, but this
788  // would require two passes through the column IDs, so we make it
789  // the max of 100 and the number of block rows.
790  //
791  // FIXME (mfh 11 Feb 2015) Tpetra::Details::HashTable can hold at
792  // most INT_MAX entries, but it's possible to have more rows than
793  // that (if size_t is 64 bits and int is 32 bits).
794  const size_t numMyRows = rowptr.size () - 1;
795  const int hashsize = std::max (static_cast<int> (numMyRows), 100);
796 
797  Tpetra::Details::HashTable<GO, LO> RemoteGIDs (hashsize);
798  Teuchos::Array<GO> RemoteGIDList;
799  RemoteGIDList.reserve (hashsize);
800  Teuchos::Array<int> PIDList;
801  PIDList.reserve (hashsize);
802 
803  // Here we start using the *LocalOrdinal* colind_LID array. This is
804  // safe even if both columnIndices arrays are actually the same
805  // (because LocalOrdinal==GO). For *local* GID's set
806  // colind_LID with with their LID in the domainMap. For *remote*
807  // GIDs, we set colind_LID with (numDomainElements+NumRemoteColGIDs)
808  // before the increment of the remote count. These numberings will
809  // be separate because no local LID is greater than
810  // numDomainElements.
811 
812  size_t NumLocalColGIDs = 0;
813  LO NumRemoteColGIDs = 0;
814  for (size_t i = 0; i < numMyRows; ++i) {
815  for(size_t j = rowptr[i]; j < rowptr[i+1]; ++j) {
816  const GO GID = colind_GID[j];
817  // Check if GID matches a row GID
818  const LO LID = domainMap.getLocalElement (GID);
819  if(LID != -1) {
820  const bool alreadyFound = LocalGIDs[LID];
821  if (! alreadyFound) {
822  LocalGIDs[LID] = true; // There is a column in the graph associated with this domain map GID
823  NumLocalColGIDs++;
824  }
825  colind_LID[j] = LID;
826  }
827  else {
828  const LO hash_value = RemoteGIDs.get (GID);
829  if (hash_value == -1) { // This means its a new remote GID
830  const int PID = owningPIDs[j];
831  TEUCHOS_TEST_FOR_EXCEPTION(
832  PID == -1, std::invalid_argument, prefix << "Cannot figure out if "
833  "PID is owned.");
834  colind_LID[j] = static_cast<LO> (numDomainElements + NumRemoteColGIDs);
835  RemoteGIDs.add (GID, NumRemoteColGIDs);
836  RemoteGIDList.push_back (GID);
837  PIDList.push_back (PID);
838  NumRemoteColGIDs++;
839  }
840  else {
841  colind_LID[j] = static_cast<LO> (numDomainElements + hash_value);
842  }
843  }
844  }
845  }
846 
847  // Possible short-circuit: If all domain map GIDs are present as
848  // column indices, then set ColMap=domainMap and quit.
849  if (domainMap.getComm ()->getSize () == 1) {
850  // Sanity check: When there is only one process, there can be no
851  // remoteGIDs.
852  TEUCHOS_TEST_FOR_EXCEPTION(
853  NumRemoteColGIDs != 0, std::runtime_error, prefix << "There is only one "
854  "process in the domain Map's communicator, which means that there are no "
855  "\"remote\" indices. Nevertheless, some column indices are not in the "
856  "domain Map.");
857  if (static_cast<size_t> (NumLocalColGIDs) == numDomainElements) {
858  // In this case, we just use the domainMap's indices, which is,
859  // not coincidently, what we clobbered colind with up above
860  // anyway. No further reindexing is needed.
861  colMap = domainMapRCP;
862  return;
863  }
864  }
865 
866  // Now build the array containing column GIDs
867  // Build back end, containing remote GIDs, first
868  const LO numMyCols = NumLocalColGIDs + NumRemoteColGIDs;
869  Teuchos::Array<GO> ColIndices;
870  GO* RemoteColIndices = NULL;
871  if (numMyCols > 0) {
872  ColIndices.resize (numMyCols);
873  if (NumLocalColGIDs != static_cast<size_t> (numMyCols)) {
874  RemoteColIndices = &ColIndices[NumLocalColGIDs]; // Points to back half of ColIndices
875  }
876  }
877 
878  for (LO i = 0; i < NumRemoteColGIDs; ++i) {
879  RemoteColIndices[i] = RemoteGIDList[i];
880  }
881 
882  // Build permute array for *remote* reindexing.
883  Teuchos::Array<LO> RemotePermuteIDs (NumRemoteColGIDs);
884  for (LO i = 0; i < NumRemoteColGIDs; ++i) {
885  RemotePermuteIDs[i]=i;
886  }
887 
888  // Sort External column indices so that all columns coming from a
889  // given remote processor are contiguous. This is a sort with two
890  // auxilary arrays: RemoteColIndices and RemotePermuteIDs.
891  Tpetra::sort3 (PIDList.begin (), PIDList.end (),
892  ColIndices.begin () + NumLocalColGIDs,
893  RemotePermuteIDs.begin ());
894 
895  // Stash the RemotePIDs.
896  //
897  // Note: If Teuchos::Array had a shrink_to_fit like std::vector,
898  // we'd call it here.
899  remotePIDs = PIDList;
900 
901  // Sort external column indices so that columns from a given remote
902  // processor are not only contiguous but also in ascending
903  // order. NOTE: I don't know if the number of externals associated
904  // with a given remote processor is known at this point ... so I
905  // count them here.
906 
907  // NTS: Only sort the RemoteColIndices this time...
908  LO StartCurrent = 0, StartNext = 1;
909  while (StartNext < NumRemoteColGIDs) {
910  if (PIDList[StartNext]==PIDList[StartNext-1]) {
911  StartNext++;
912  }
913  else {
914  Tpetra::sort2 (ColIndices.begin () + NumLocalColGIDs + StartCurrent,
915  ColIndices.begin () + NumLocalColGIDs + StartNext,
916  RemotePermuteIDs.begin () + StartCurrent);
917  StartCurrent = StartNext;
918  StartNext++;
919  }
920  }
921  Tpetra::sort2 (ColIndices.begin () + NumLocalColGIDs + StartCurrent,
922  ColIndices.begin () + NumLocalColGIDs + StartNext,
923  RemotePermuteIDs.begin () + StartCurrent);
924 
925  // Reverse the permutation to get the information we actually care about
926  Teuchos::Array<LO> ReverseRemotePermuteIDs (NumRemoteColGIDs);
927  for (LO i = 0; i < NumRemoteColGIDs; ++i) {
928  ReverseRemotePermuteIDs[RemotePermuteIDs[i]] = i;
929  }
930 
931  // Build permute array for *local* reindexing.
932  bool use_local_permute = false;
933  Teuchos::Array<LO> LocalPermuteIDs (numDomainElements);
934 
935  // Now fill front end. Two cases:
936  //
937  // (1) If the number of Local column GIDs is the same as the number
938  // of Local domain GIDs, we can simply read the domain GIDs into
939  // the front part of ColIndices, otherwise
940  //
941  // (2) We step through the GIDs of the domainMap, checking to see if
942  // each domain GID is a column GID. we want to do this to
943  // maintain a consistent ordering of GIDs between the columns
944  // and the domain.
945  Teuchos::ArrayView<const GO> domainGlobalElements = domainMap.getLocalElementList();
946  if (static_cast<size_t> (NumLocalColGIDs) == numDomainElements) {
947  if (NumLocalColGIDs > 0) {
948  // Load Global Indices into first numMyCols elements column GID list
949  std::copy (domainGlobalElements.begin (), domainGlobalElements.end (),
950  ColIndices.begin ());
951  }
952  }
953  else {
954  LO NumLocalAgain = 0;
955  use_local_permute = true;
956  for (size_t i = 0; i < numDomainElements; ++i) {
957  if (LocalGIDs[i]) {
958  LocalPermuteIDs[i] = NumLocalAgain;
959  ColIndices[NumLocalAgain++] = domainGlobalElements[i];
960  }
961  }
962  TEUCHOS_TEST_FOR_EXCEPTION(
963  static_cast<size_t> (NumLocalAgain) != NumLocalColGIDs,
964  std::runtime_error, prefix << "Local ID count test failed.");
965  }
966 
967  // Make column Map
968  const GST minus_one = Teuchos::OrdinalTraits<GST>::invalid ();
969  colMap = rcp (new map_type (minus_one, ColIndices, domainMap.getIndexBase (),
970  domainMap.getComm ()));
971 
972  // Low-cost reindex of the matrix
973  for (size_t i = 0; i < numMyRows; ++i) {
974  for (size_t j = rowptr[i]; j < rowptr[i+1]; ++j) {
975  const LO ID = colind_LID[j];
976  if (static_cast<size_t> (ID) < numDomainElements) {
977  if (use_local_permute) {
978  colind_LID[j] = LocalPermuteIDs[colind_LID[j]];
979  }
980  // In the case where use_local_permute==false, we just copy
981  // the DomainMap's ordering, which it so happens is what we
982  // put in colind_LID to begin with.
983  }
984  else {
985  colind_LID[j] = NumLocalColGIDs + ReverseRemotePermuteIDs[colind_LID[j]-numDomainElements];
986  }
987  }
988  }
989 }
990 
991 
992 template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
993 void
994 lowCommunicationMakeColMapAndReindex (
995  const Teuchos::ArrayView<const size_t> &rowptr,
996  const Teuchos::ArrayView<LocalOrdinal> &colind_LID,
997  const Teuchos::ArrayView<GlobalOrdinal> &colind_GID,
998  const Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >& domainMapRCP,
999  const Teuchos::ArrayView<const int> &owningPIDs,
1000  Teuchos::Array<int> &remotePIDs,
1001  Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > & colMap)
1002 {
1003  using Teuchos::rcp;
1004  typedef LocalOrdinal LO;
1005  typedef GlobalOrdinal GO;
1006  typedef Tpetra::global_size_t GST;
1007  typedef Tpetra::Map<LO, GO, Node> map_type;
1008  const char prefix[] = "lowCommunicationMakeColMapAndReindex: ";
1009 
1010  typedef typename Node::device_type DT;
1011  using execution_space = typename DT::execution_space;
1012  execution_space exec;
1013  using team_policy = Kokkos::TeamPolicy<execution_space, Kokkos::Schedule<Kokkos::Dynamic>>;
1014  typedef typename map_type::local_map_type local_map_type;
1015 
1016  // Create device mirror and host mirror views from function parameters
1017  // When we pass in views instead of Teuchos::ArrayViews, we can avoid copying views
1018  auto colind_LID_view = Details::create_mirror_view_from_raw_host_array(exec, colind_LID.getRawPtr(), colind_LID.size(), true, "colind_LID");
1019  auto rowptr_view = Details::create_mirror_view_from_raw_host_array(exec, rowptr.getRawPtr(), rowptr.size(), true, "rowptr");
1020  auto colind_GID_view = Details::create_mirror_view_from_raw_host_array(exec, colind_GID.getRawPtr(), colind_GID.size(), true, "colind_GID");
1021  auto owningPIDs_view = Details::create_mirror_view_from_raw_host_array(exec, owningPIDs.getRawPtr(), owningPIDs.size(), true, "owningPIDs");
1022 
1023  typename decltype(colind_LID_view)::HostMirror colind_LID_host(colind_LID.getRawPtr(), colind_LID.size());
1024  typename decltype(colind_GID_view)::HostMirror colind_GID_host(colind_GID.getRawPtr(), colind_GID.size());
1025 
1026  Kokkos::deep_copy(colind_LID_view, colind_LID_host);
1027  Kokkos::deep_copy(colind_GID_view, colind_GID_host);
1028 
1029  // The domainMap is an RCP because there is a shortcut for a
1030  // (common) special case to return the columnMap = domainMap.
1031  const map_type& domainMap = *domainMapRCP;
1032 
1033  Kokkos::UnorderedMap<LO, bool, DT> LocalGIDs_view_map(colind_LID.size());
1034  Kokkos::UnorderedMap<GO, LO, DT> RemoteGIDs_view_map(colind_LID.size());
1035 
1036  const size_t numMyRows = rowptr.size () - 1;
1037  local_map_type domainMap_local = domainMap.getLocalMap();
1038 
1039  const size_t numDomainElements = domainMap.getLocalNumElements ();
1040  Kokkos::View<bool*, DT> LocalGIDs_view("LocalGIDs", numDomainElements);
1041  auto LocalGIDs_host = Kokkos::create_mirror_view(LocalGIDs_view);
1042 
1043  size_t NumLocalColGIDs = 0;
1044 
1045  // Scan all column indices and sort into two groups:
1046  // Local: those whose GID matches a GID of the domain map on this processor and
1047  // Remote: All others.
1048  // Kokkos::Parallel_reduce sums up NumLocalColGIDs, while we use the size of the Remote GIDs map to find NumRemoteColGIDs
1049  Kokkos::parallel_reduce(team_policy(numMyRows, Kokkos::AUTO), KOKKOS_LAMBDA(const typename team_policy::member_type &member, size_t &update) {
1050  const int i = member.league_rank();
1051  size_t NumLocalColGIDs_temp = 0;
1052  size_t rowptr_start = rowptr_view[i];
1053  size_t rowptr_end = rowptr_view[i+1];
1054  Kokkos::parallel_reduce(Kokkos::TeamThreadRange(member, rowptr_start, rowptr_end), [&](const size_t j, size_t &innerUpdate) {
1055  const GO GID = colind_GID_view[j];
1056  // Check if GID matches a row GID in local domain map
1057  const LO LID = domainMap_local.getLocalElement (GID);
1058  if (LID != -1) {
1059  auto outcome = LocalGIDs_view_map.insert(LID);
1060  // Fresh insert
1061  if(outcome.success()) {
1062  LocalGIDs_view[LID] = true;
1063  innerUpdate++;
1064  }
1065  }
1066  else {
1067  const int PID = owningPIDs_view[j];
1068  auto outcome = RemoteGIDs_view_map.insert(GID, PID);
1069  if(outcome.success() && PID == -1) {
1070  Kokkos::abort("Cannot figure out if ID is owned.\n");
1071  }
1072  }
1073  }, NumLocalColGIDs_temp);
1074  if(member.team_rank() == 0) update += NumLocalColGIDs_temp;
1075  }, NumLocalColGIDs);
1076 
1077  LO NumRemoteColGIDs = RemoteGIDs_view_map.size();
1078 
1079  Kokkos::View<int*, DT> PIDList_view("PIDList", NumRemoteColGIDs);
1080  auto PIDList_host = Kokkos::create_mirror_view(PIDList_view);
1081 
1082  Kokkos::View<GO*, DT> RemoteGIDList_view("RemoteGIDList", NumRemoteColGIDs);
1083  auto RemoteGIDList_host = Kokkos::create_mirror_view(RemoteGIDList_view);
1084 
1085  // For each index in RemoteGIDs_map that contains a GID, use "update" to indicate the number of GIDs "before" this GID
1086  // This maps each element in the RemoteGIDs hash table to an index in RemoteGIDList / PIDList without any overwriting or empty spaces between indices
1087  Kokkos::parallel_scan(Kokkos::RangePolicy<execution_space>(0, RemoteGIDs_view_map.capacity()), KOKKOS_LAMBDA(const int i, GO& update, const bool final) {
1088  if(final && RemoteGIDs_view_map.valid_at(i)) {
1089  RemoteGIDList_view[update] = RemoteGIDs_view_map.key_at(i);
1090  PIDList_view[update] = RemoteGIDs_view_map.value_at(i);
1091  }
1092  if(RemoteGIDs_view_map.valid_at(i)) {
1093  update += 1;
1094  }
1095  });
1096 
1097  // Possible short-circuit: If all domain map GIDs are present as
1098  // column indices, then set ColMap=domainMap and quit.
1099  if (domainMap.getComm ()->getSize () == 1) {
1100  // Sanity check: When there is only one process, there can be no
1101  // remoteGIDs.
1102  TEUCHOS_TEST_FOR_EXCEPTION(
1103  NumRemoteColGIDs != 0, std::runtime_error, prefix << "There is only one "
1104  "process in the domain Map's communicator, which means that there are no "
1105  "\"remote\" indices. Nevertheless, some column indices are not in the "
1106  "domain Map.");
1107  if (static_cast<size_t> (NumLocalColGIDs) == numDomainElements) {
1108  // In this case, we just use the domainMap's indices, which is,
1109  // not coincidently, what we clobbered colind with up above
1110  // anyway. No further reindexing is needed.
1111  colMap = domainMapRCP;
1112 
1113  // Fill out local colMap (which should only contain local GIDs)
1114  auto localColMap = colMap->getLocalMap();
1115  Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0, colind_GID.size()), KOKKOS_LAMBDA(const int i) {
1116  colind_LID_view[i] = localColMap.getLocalElement(colind_GID_view[i]);
1117  });
1118  Kokkos::deep_copy(execution_space(), colind_LID_host, colind_LID_view);
1119  return;
1120  }
1121  }
1122 
1123  // Now build the array containing column GIDs
1124  // Build back end, containing remote GIDs, first
1125  const LO numMyCols = NumLocalColGIDs + NumRemoteColGIDs;
1126  Kokkos::View<GO*, DT> ColIndices_view("ColIndices", numMyCols);
1127 
1128  // We don't need to load the backend of ColIndices or sort if there are no remote GIDs
1129  if(NumRemoteColGIDs > 0) {
1130  if(NumLocalColGIDs != static_cast<size_t> (numMyCols)) {
1131  Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0, NumRemoteColGIDs), KOKKOS_LAMBDA(const int i) {
1132  ColIndices_view[NumLocalColGIDs+i] = RemoteGIDList_view[i];
1133  });
1134  }
1135 
1136  // Find the largest PID for bin sorting purposes
1137  int PID_max = 0;
1138  Kokkos::parallel_reduce(Kokkos::RangePolicy<execution_space>(0, PIDList_host.size()), KOKKOS_LAMBDA(const int i, int& max) {
1139  if(max < PIDList_view[i]) max = PIDList_view[i];
1140  }, Kokkos::Max<int>(PID_max));
1141 
1142  using KeyViewTypePID = decltype(PIDList_view);
1143  using BinSortOpPID = Kokkos::BinOp1D<KeyViewTypePID>;
1144 
1145  // Make a subview of ColIndices for remote GID sorting
1146  auto ColIndices_subview = Kokkos::subview(ColIndices_view, Kokkos::make_pair(NumLocalColGIDs, ColIndices_view.size()));
1147 
1148  // Make binOp with bins = PID_max + 1, min = 0, max = PID_max
1149  BinSortOpPID binOp2(PID_max+1, 0, PID_max);
1150 
1151  // Sort External column indices so that all columns coming from a
1152  // given remote processor are contiguous. This is a sort with one
1153  // auxilary array: RemoteColIndices
1154  Kokkos::BinSort<KeyViewTypePID, BinSortOpPID> bin_sort2(PIDList_view, 0, PIDList_view.size(), binOp2, false);
1155  bin_sort2.create_permute_vector(exec);
1156  bin_sort2.sort(exec, PIDList_view);
1157  bin_sort2.sort(exec, ColIndices_subview);
1158 
1159  // Deep copy back from device to host
1160  Kokkos::deep_copy(execution_space(), PIDList_host, PIDList_view);
1161 
1162  // Stash the RemotePIDs. Once remotePIDs is changed to become a Kokkos view, we can remove this and copy directly.
1163  // Note: If Teuchos::Array had a shrink_to_fit like std::vector,
1164  // we'd call it here.
1165 
1166  Teuchos::Array<int> PIDList(NumRemoteColGIDs);
1167  for(LO i = 0; i < NumRemoteColGIDs; ++i) {
1168  PIDList[i] = PIDList_host[i];
1169  }
1170 
1171  remotePIDs = PIDList;
1172 
1173  // Sort external column indices so that columns from a given remote
1174  // processor are not only contiguous but also in ascending
1175  // order. NOTE: I don't know if the number of externals associated
1176  // with a given remote processor is known at this point ... so I
1177  // count them here.
1178  LO StartCurrent = 0, StartNext = 1;
1179  while(StartNext < NumRemoteColGIDs) {
1180  if(PIDList_host[StartNext] == PIDList_host[StartNext-1]) {
1181  StartNext++;
1182  }
1183  else {
1184  Kokkos::sort(ColIndices_view, NumLocalColGIDs + StartCurrent, NumLocalColGIDs + StartNext);
1185  StartCurrent = StartNext;
1186  StartNext++;
1187  }
1188  }
1189 
1190  Kokkos::sort(ColIndices_view, NumLocalColGIDs + StartCurrent, NumLocalColGIDs + StartNext);
1191  }
1192 
1193 
1194  // Build permute array for *local* reindexing.
1195 
1196  // Now fill front end. Two cases:
1197  //
1198  // (1) If the number of Local column GIDs is the same as the number
1199  // of Local domain GIDs, we can simply read the domain GIDs into
1200  // the front part of ColIndices, otherwise
1201  //
1202  // (2) We step through the GIDs of the domainMap, checking to see if
1203  // each domain GID is a column GID. we want to do this to
1204  // maintain a consistent ordering of GIDs between the columns
1205  // and the domain.
1206  if (static_cast<size_t> (NumLocalColGIDs) == numDomainElements) {
1207  if (NumLocalColGIDs > 0) {
1208  // Load Global Indices into first numMyCols elements column GID list
1209  Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0, numDomainElements), KOKKOS_LAMBDA(const int i) {
1210  ColIndices_view[i] = domainMap_local.getGlobalElement(i);
1211  });
1212  }
1213  }
1214  else {
1215  // This part isn't actually tested in the unit tests
1216  LO NumLocalAgain = 0;
1217  Kokkos::parallel_scan(Kokkos::RangePolicy<execution_space>(0, numDomainElements), KOKKOS_LAMBDA(const int i, LO& update, const bool final) {
1218  if(final && LocalGIDs_view[i]) {
1219  ColIndices_view[update] = domainMap_local.getGlobalElement(i);
1220  }
1221  if(LocalGIDs_view[i]) {
1222  update++;
1223  }
1224  }, NumLocalAgain);
1225 
1226 
1227  TEUCHOS_TEST_FOR_EXCEPTION(
1228  static_cast<size_t> (NumLocalAgain) != NumLocalColGIDs,
1229  std::runtime_error, prefix << "Local ID count test failed.");
1230  }
1231 
1232  // Make column Map
1233  const GST minus_one = Teuchos::OrdinalTraits<GST>::invalid ();
1234 
1235  colMap = rcp (new map_type (minus_one, ColIndices_view, domainMap.getIndexBase (),
1236  domainMap.getComm ()));
1237 
1238  // Fill out colind_LID using local map
1239  auto localColMap = colMap->getLocalMap();
1240  Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0, colind_GID.size()), KOKKOS_LAMBDA(const int i) {
1241  colind_LID_view[i] = localColMap.getLocalElement(colind_GID_view[i]);
1242  });
1243 
1244  // For now, we copy back into colind_LID_host (which also overwrites the colind_LID Tuechos array)
1245  // When colind_LID becomes a Kokkos View we can delete this
1246  Kokkos::deep_copy(exec, colind_LID_host, colind_LID_view);
1247 }
1248 
1249 template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
1250 void
1251 lowCommunicationMakeColMapAndReindex (
1252  const Kokkos::View<size_t*,typename Node::device_type> rowptr_view,
1253  const Kokkos::View<LocalOrdinal*,typename Node::device_type> colind_LID_view,
1254  const Kokkos::View<GlobalOrdinal*,typename Node::device_type> colind_GID_view,
1255  const Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >& domainMapRCP,
1256  const Kokkos::View<int*,typename Node::device_type> owningPIDs_view,
1257  Teuchos::Array<int> &remotePIDs,
1258  Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > & colMap)
1259 {
1260  using Teuchos::rcp;
1261  typedef LocalOrdinal LO;
1262  typedef GlobalOrdinal GO;
1263  typedef Tpetra::global_size_t GST;
1264  typedef Tpetra::Map<LO, GO, Node> map_type;
1265  const char prefix[] = "lowCommunicationMakeColMapAndReindex: ";
1266 
1267  typedef typename Node::device_type DT;
1268  using execution_space = typename DT::execution_space;
1269  execution_space exec;
1270  using team_policy = Kokkos::TeamPolicy<execution_space, Kokkos::Schedule<Kokkos::Dynamic>>;
1271  typedef typename map_type::local_map_type local_map_type;
1272 
1273  // The domainMap is an RCP because there is a shortcut for a
1274  // (common) special case to return the columnMap = domainMap.
1275  const map_type& domainMap = *domainMapRCP;
1276 
1277  Kokkos::UnorderedMap<LO, bool, DT> LocalGIDs_view_map(colind_LID_view.size());
1278  Kokkos::UnorderedMap<GO, LO, DT> RemoteGIDs_view_map(colind_LID_view.size());
1279 
1280  const size_t numMyRows = rowptr_view.size () - 1;
1281  local_map_type domainMap_local = domainMap.getLocalMap();
1282 
1283  const size_t numDomainElements = domainMap.getLocalNumElements ();
1284  Kokkos::View<bool*, DT> LocalGIDs_view("LocalGIDs", numDomainElements);
1285  auto LocalGIDs_host = Kokkos::create_mirror_view(LocalGIDs_view);
1286 
1287  size_t NumLocalColGIDs = 0;
1288 
1289  // Scan all column indices and sort into two groups:
1290  // Local: those whose GID matches a GID of the domain map on this processor and
1291  // Remote: All others.
1292  // Kokkos::Parallel_reduce sums up NumLocalColGIDs, while we use the size of the Remote GIDs map to find NumRemoteColGIDs
1293  Kokkos::parallel_reduce(team_policy(numMyRows, Kokkos::AUTO), KOKKOS_LAMBDA(const typename team_policy::member_type &member, size_t &update) {
1294  const int i = member.league_rank();
1295  size_t NumLocalColGIDs_temp = 0;
1296  size_t rowptr_start = rowptr_view[i];
1297  size_t rowptr_end = rowptr_view[i+1];
1298  Kokkos::parallel_reduce(Kokkos::TeamThreadRange(member, rowptr_start, rowptr_end), [&](const size_t j, size_t &innerUpdate) {
1299  const GO GID = colind_GID_view[j];
1300  // Check if GID matches a row GID in local domain map
1301  const LO LID = domainMap_local.getLocalElement (GID);
1302  if (LID != -1) {
1303  auto outcome = LocalGIDs_view_map.insert(LID);
1304  // Fresh insert
1305  if(outcome.success()) {
1306  LocalGIDs_view[LID] = true;
1307  innerUpdate++;
1308  }
1309  }
1310  else {
1311  const int PID = owningPIDs_view[j];
1312  auto outcome = RemoteGIDs_view_map.insert(GID, PID);
1313  if(outcome.success() && PID == -1) {
1314  Kokkos::abort("Cannot figure out if ID is owned.\n");
1315  }
1316  }
1317  }, NumLocalColGIDs_temp);
1318  if(member.team_rank() == 0) update += NumLocalColGIDs_temp;
1319  }, NumLocalColGIDs);
1320 
1321  LO NumRemoteColGIDs = RemoteGIDs_view_map.size();
1322 
1323  Kokkos::View<int*, DT> PIDList_view("PIDList_d", NumRemoteColGIDs);
1324 
1325  Kokkos::View<GO*, DT> RemoteGIDList_view("RemoteGIDList", NumRemoteColGIDs);
1326  auto RemoteGIDList_host = Kokkos::create_mirror_view(RemoteGIDList_view);
1327 
1328  // For each index in RemoteGIDs_map that contains a GID, use "update" to indicate the number of GIDs "before" this GID
1329  // This maps each element in the RemoteGIDs hash table to an index in RemoteGIDList / PIDList without any overwriting or empty spaces between indices
1330  Kokkos::parallel_scan(Kokkos::RangePolicy<execution_space>(0, RemoteGIDs_view_map.capacity()), KOKKOS_LAMBDA(const int i, GO& update, const bool final) {
1331  if(final && RemoteGIDs_view_map.valid_at(i)) {
1332  RemoteGIDList_view[update] = RemoteGIDs_view_map.key_at(i);
1333  PIDList_view[update] = RemoteGIDs_view_map.value_at(i);
1334  }
1335  if(RemoteGIDs_view_map.valid_at(i)) {
1336  update += 1;
1337  }
1338  });
1339 
1340  // Possible short-circuit: If all domain map GIDs are present as
1341  // column indices, then set ColMap=domainMap and quit.
1342  if (domainMap.getComm ()->getSize () == 1) {
1343  // Sanity check: When there is only one process, there can be no
1344  // remoteGIDs.
1345  TEUCHOS_TEST_FOR_EXCEPTION(
1346  NumRemoteColGIDs != 0, std::runtime_error, prefix << "There is only one "
1347  "process in the domain Map's communicator, which means that there are no "
1348  "\"remote\" indices. Nevertheless, some column indices are not in the "
1349  "domain Map.");
1350  if (static_cast<size_t> (NumLocalColGIDs) == numDomainElements) {
1351  // In this case, we just use the domainMap's indices, which is,
1352  // not coincidently, what we clobbered colind with up above
1353  // anyway. No further reindexing is needed.
1354  colMap = domainMapRCP;
1355 
1356  // Fill out local colMap (which should only contain local GIDs)
1357  auto localColMap = colMap->getLocalMap();
1358  Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0, colind_GID_view.size()), KOKKOS_LAMBDA(const int i) {
1359  colind_LID_view[i] = localColMap.getLocalElement(colind_GID_view[i]);
1360  });
1361  return;
1362  }
1363  }
1364 
1365  // Now build the array containing column GIDs
1366  // Build back end, containing remote GIDs, first
1367  const LO numMyCols = NumLocalColGIDs + NumRemoteColGIDs;
1368  Kokkos::View<GO*, DT> ColIndices_view("ColIndices", numMyCols);
1369 
1370  // We don't need to load the backend of ColIndices or sort if there are no remote GIDs
1371  if(NumRemoteColGIDs > 0) {
1372  if(NumLocalColGIDs != static_cast<size_t> (numMyCols)) {
1373  Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0, NumRemoteColGIDs), KOKKOS_LAMBDA(const int i) {
1374  ColIndices_view[NumLocalColGIDs+i] = RemoteGIDList_view[i];
1375  });
1376  }
1377 
1378  // Find the largest PID for bin sorting purposes
1379  int PID_max = 0;
1380  Kokkos::parallel_reduce(Kokkos::RangePolicy<execution_space>(0, PIDList_view.size()), KOKKOS_LAMBDA(const int i, int& max) {
1381  if(max < PIDList_view[i]) max = PIDList_view[i];
1382  }, Kokkos::Max<int>(PID_max));
1383 
1384  using KeyViewTypePID = decltype(PIDList_view);
1385  using BinSortOpPID = Kokkos::BinOp1D<KeyViewTypePID>;
1386 
1387  // Make a subview of ColIndices for remote GID sorting
1388  auto ColIndices_subview = Kokkos::subview(ColIndices_view, Kokkos::make_pair(NumLocalColGIDs, ColIndices_view.size()));
1389 
1390  // Make binOp with bins = PID_max + 1, min = 0, max = PID_max
1391  BinSortOpPID binOp2(PID_max+1, 0, PID_max);
1392 
1393  // Sort External column indices so that all columns coming from a
1394  // given remote processor are contiguous. This is a sort with one
1395  // auxilary array: RemoteColIndices
1396  Kokkos::BinSort<KeyViewTypePID, BinSortOpPID> bin_sort2(PIDList_view, 0, PIDList_view.size(), binOp2, false);
1397  bin_sort2.create_permute_vector(exec);
1398  bin_sort2.sort(exec, PIDList_view);
1399  bin_sort2.sort(exec, ColIndices_subview);
1400 
1401  // Deep copy back from device to host
1402  // Stash the RemotePIDs. Once remotePIDs is changed to become a Kokkos view, we can remove this and copy directly.
1403  Teuchos::Array<int> PIDList(NumRemoteColGIDs);
1404  Kokkos::View<int*, Kokkos::HostSpace> PIDList_host(PIDList.data(), PIDList.size());
1405  Kokkos::deep_copy(exec, PIDList_host, PIDList_view);
1406  exec.fence();
1407 
1408  remotePIDs = PIDList;
1409 
1410  // Sort external column indices so that columns from a given remote
1411  // processor are not only contiguous but also in ascending
1412  // order. NOTE: I don't know if the number of externals associated
1413  // with a given remote processor is known at this point ... so I
1414  // count them here.
1415  LO StartCurrent = 0, StartNext = 1;
1416  while(StartNext < NumRemoteColGIDs) {
1417  if(PIDList_host[StartNext] == PIDList_host[StartNext-1]) {
1418  StartNext++;
1419  }
1420  else {
1421  Kokkos::sort(ColIndices_view, NumLocalColGIDs + StartCurrent, NumLocalColGIDs + StartNext);
1422  StartCurrent = StartNext;
1423  StartNext++;
1424  }
1425  }
1426 
1427  Kokkos::sort(ColIndices_view, NumLocalColGIDs + StartCurrent, NumLocalColGIDs + StartNext);
1428  }
1429 
1430 
1431  // Build permute array for *local* reindexing.
1432 
1433  // Now fill front end. Two cases:
1434  //
1435  // (1) If the number of Local column GIDs is the same as the number
1436  // of Local domain GIDs, we can simply read the domain GIDs into
1437  // the front part of ColIndices, otherwise
1438  //
1439  // (2) We step through the GIDs of the domainMap, checking to see if
1440  // each domain GID is a column GID. we want to do this to
1441  // maintain a consistent ordering of GIDs between the columns
1442  // and the domain.
1443  if (static_cast<size_t> (NumLocalColGIDs) == numDomainElements) {
1444  if (NumLocalColGIDs > 0) {
1445  // Load Global Indices into first numMyCols elements column GID list
1446  Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0, numDomainElements), KOKKOS_LAMBDA(const int i) {
1447  ColIndices_view[i] = domainMap_local.getGlobalElement(i);
1448  });
1449  }
1450  }
1451  else {
1452  // This part isn't actually tested in the unit tests
1453  LO NumLocalAgain = 0;
1454  Kokkos::parallel_scan(Kokkos::RangePolicy<execution_space>(0, numDomainElements), KOKKOS_LAMBDA(const int i, LO& update, const bool final) {
1455  if(final && LocalGIDs_view[i]) {
1456  ColIndices_view[update] = domainMap_local.getGlobalElement(i);
1457  }
1458  if(LocalGIDs_view[i]) {
1459  update++;
1460  }
1461  }, NumLocalAgain);
1462 
1463 
1464  TEUCHOS_TEST_FOR_EXCEPTION(
1465  static_cast<size_t> (NumLocalAgain) != NumLocalColGIDs,
1466  std::runtime_error, prefix << "Local ID count test failed.");
1467  }
1468 
1469  // Make column Map
1470  const GST minus_one = Teuchos::OrdinalTraits<GST>::invalid ();
1471 
1472  colMap = rcp (new map_type (minus_one, ColIndices_view, domainMap.getIndexBase (),
1473  domainMap.getComm ()));
1474 
1475  // Fill out colind_LID using local map
1476  auto localColMap = colMap->getLocalMap();
1477  Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0, colind_GID_view.size()), KOKKOS_LAMBDA(const int i) {
1478  colind_LID_view[i] = localColMap.getLocalElement(colind_GID_view[i]);
1479  });
1480 }
1481 
1482 // Generates an list of owning PIDs based on two transfer (aka import/export objects)
1483 // Let:
1484 // OwningMap = useReverseModeForOwnership ? transferThatDefinesOwnership.getTargetMap() : transferThatDefinesOwnership.getSourceMap();
1485 // MapAo = useReverseModeForOwnership ? transferThatDefinesOwnership.getSourceMap() : transferThatDefinesOwnership.getTargetMap();
1486 // MapAm = useReverseModeForMigration ? transferThatDefinesMigration.getTargetMap() : transferThatDefinesMigration.getSourceMap();
1487 // VectorMap = useReverseModeForMigration ? transferThatDefinesMigration.getSourceMap() : transferThatDefinesMigration.getTargetMap();
1488 // Precondition:
1489 // 1) MapAo.isSameAs(*MapAm) - map compatibility between transfers
1490 // 2) VectorMap->isSameAs(*owningPIDs->getMap()) - map compabibility between transfer & vector
1491 // 3) OwningMap->isOneToOne() - owning map is 1-to-1
1492 // --- Precondition 3 is only checked in DEBUG mode ---
1493 // Postcondition:
1494 // owningPIDs[VectorMap->getLocalElement(GID i)] = j iff (OwningMap->isLocalElement(GID i) on rank j)
1495 template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
1496 void getTwoTransferOwnershipVector(const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& transferThatDefinesOwnership,
1497  bool useReverseModeForOwnership,
1498  const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& transferThatDefinesMigration,
1499  bool useReverseModeForMigration,
1503 
1504  Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > OwningMap = useReverseModeForOwnership ?
1505  transferThatDefinesOwnership.getTargetMap() :
1506  transferThatDefinesOwnership.getSourceMap();
1507  Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > MapAo = useReverseModeForOwnership ?
1508  transferThatDefinesOwnership.getSourceMap() :
1509  transferThatDefinesOwnership.getTargetMap();
1510  Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > MapAm = useReverseModeForMigration ?
1511  transferThatDefinesMigration.getTargetMap() :
1512  transferThatDefinesMigration.getSourceMap();
1513  Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > VectorMap = useReverseModeForMigration ?
1514  transferThatDefinesMigration.getSourceMap() :
1515  transferThatDefinesMigration.getTargetMap();
1516 
1517  TEUCHOS_TEST_FOR_EXCEPTION(!MapAo->isSameAs(*MapAm),std::runtime_error,"Tpetra::Import_Util::getTwoTransferOwnershipVector map mismatch between transfers");
1518  TEUCHOS_TEST_FOR_EXCEPTION(!VectorMap->isSameAs(*owningPIDs.getMap()),std::runtime_error,"Tpetra::Import_Util::getTwoTransferOwnershipVector map mismatch transfer and vector");
1519 #ifdef HAVE_TPETRA_DEBUG
1520  TEUCHOS_TEST_FOR_EXCEPTION(!OwningMap->isOneToOne(),std::runtime_error,"Tpetra::Import_Util::getTwoTransferOwnershipVector owner must be 1-to-1");
1521 #endif
1522 
1523  int rank = OwningMap->getComm()->getRank();
1524  // Generate "A" vector and fill it with owning information. We can read this from transferThatDefinesOwnership w/o communication
1525  // Note: Due to the 1-to-1 requirement, several of these options throw
1527  const import_type* ownAsImport = dynamic_cast<const import_type*> (&transferThatDefinesOwnership);
1528  const export_type* ownAsExport = dynamic_cast<const export_type*> (&transferThatDefinesOwnership);
1529 
1530  Teuchos::ArrayRCP<int> pids = temp.getDataNonConst();
1531  Teuchos::ArrayView<int> v_pids = pids();
1532  if(ownAsImport && useReverseModeForOwnership) {TEUCHOS_TEST_FOR_EXCEPTION(1,std::runtime_error,"Tpetra::Import_Util::getTwoTransferOwnershipVector owner must be 1-to-1");}
1533  else if(ownAsImport && !useReverseModeForOwnership) getPids(*ownAsImport,v_pids,false);
1534  else if(ownAsExport && useReverseModeForMigration) {TEUCHOS_TEST_FOR_EXCEPTION(1,std::runtime_error,"Tpetra::Import_Util::getTwoTransferOwnershipVector this option not yet implemented");}
1535  else {TEUCHOS_TEST_FOR_EXCEPTION(1,std::runtime_error,"Tpetra::Import_Util::getTwoTransferOwnershipVector owner must be 1-to-1");}
1536 
1537  const import_type* xferAsImport = dynamic_cast<const import_type*> (&transferThatDefinesMigration);
1538  const export_type* xferAsExport = dynamic_cast<const export_type*> (&transferThatDefinesMigration);
1539  TEUCHOS_TEST_FOR_EXCEPTION(!xferAsImport && !xferAsExport,std::runtime_error,"Tpetra::Import_Util::getTwoTransferOwnershipVector transfer undefined");
1540 
1541  // Migrate from "A" vector to output vector
1542  owningPIDs.putScalar(rank);
1543  if(xferAsImport && useReverseModeForMigration) owningPIDs.doExport(temp,*xferAsImport,Tpetra::REPLACE);
1544  else if(xferAsImport && !useReverseModeForMigration) owningPIDs.doImport(temp,*xferAsImport,Tpetra::REPLACE);
1545  else if(xferAsExport && useReverseModeForMigration) owningPIDs.doImport(temp,*xferAsExport,Tpetra::REPLACE);
1546  else owningPIDs.doExport(temp,*xferAsExport,Tpetra::REPLACE);
1547 
1548 }
1549 
1550 
1551 
1552 } // namespace Import_Util
1553 } // namespace Tpetra
1554 
1555 #endif // TPETRA_IMPORT_UTIL_HPP
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
Add specializations of Teuchos::Details::MpiTypeTraits for Kokkos::complex&lt;float&gt; and Kokkos::complex...
void sort3(const IT1 &first1, const IT1 &last1, const IT2 &first2, const IT3 &first3)
Sort the first array, and apply the same permutation to the second and third arrays.
void doImport(const SrcDistObject &source, const Import< LocalOrdinal, GlobalOrdinal, Node > &importer, const CombineMode CM, const bool restrictedMode=false)
Import data into this object using an Import object (&quot;forward mode&quot;).
void putScalar(const Scalar &value)
Set all values in the multivector with the given value.
size_t getLocalNumElements() const
The number of elements belonging to the calling process.
Declaration of the Tpetra::CrsMatrix class.
Teuchos::ArrayView< const int > getExportPIDs() const
List of processes to which entries will be sent.
Teuchos::RCP< const map_type > getTargetMap() const
The target Map used to construct this Export or Import.
void sort(View &view, const size_t &size)
Convenience wrapper for std::sort for host-accessible views.
size_t global_size_t
Global size_t object.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
size_t getNumExportIDs() const
Number of entries that must be sent by the calling process to other processes.
Declaration and definition of Tpetra::Details::reallocDualViewIfNeeded, an implementation detail of T...
Replace existing values with new values.
void start()
Start the deep_copy counter.
Teuchos::ArrayView< const LO > getExportLIDs() const
List of entries in the source Map that will be sent to other processes.
local_map_type getLocalMap() const
Get the local Map for Kokkos kernels.
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2)
Sort the first array, and apply the resulting permutation to the second array.
A parallel distribution of indices over processes.
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Export data into this object using an Export object (&quot;forward mode&quot;).
KOKKOS_INLINE_FUNCTION LocalOrdinal getLocalNumElements() const
The number of indices that live on the calling process.
A distributed dense vector.
Stand-alone utility functions and macros.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary...