Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Tpetra_Import_Util2.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_IMPORT_UTIL2_HPP
43 #define TPETRA_IMPORT_UTIL2_HPP
44 
49 
50 #include "Tpetra_ConfigDefs.hpp"
51 #include "Tpetra_Import.hpp"
52 #include "Tpetra_HashTable.hpp"
53 #include "Tpetra_Map.hpp"
54 #include "Tpetra_Util.hpp"
55 #include "Tpetra_Distributor.hpp"
58 #include "Tpetra_Vector.hpp"
59 #include "Kokkos_DualView.hpp"
60 #include <Teuchos_Array.hpp>
61 #include <utility>
62 #include <set>
63 
65 
66 namespace Tpetra {
67 namespace Import_Util {
68 
71 template<typename Scalar, typename Ordinal>
72 void
73 sortCrsEntries (const Teuchos::ArrayView<size_t>& CRS_rowptr,
74  const Teuchos::ArrayView<Ordinal>& CRS_colind,
75  const Teuchos::ArrayView<Scalar>&CRS_vals);
76 
77 template<typename Ordinal>
78 void
79 sortCrsEntries (const Teuchos::ArrayView<size_t>& CRS_rowptr,
80  const Teuchos::ArrayView<Ordinal>& CRS_colind);
81 
82 template<typename rowptr_array_type, typename colind_array_type, typename vals_array_type>
83 void
84 sortCrsEntries (const rowptr_array_type& CRS_rowptr,
85  const colind_array_type& CRS_colind,
86  const vals_array_type& CRS_vals);
87 
88 template<typename rowptr_array_type, typename colind_array_type>
89 void
90 sortCrsEntries (const rowptr_array_type& CRS_rowptr,
91  const colind_array_type& CRS_colind);
92 
97 template<typename Scalar, typename Ordinal>
98 void
99 sortAndMergeCrsEntries (const Teuchos::ArrayView<size_t>& CRS_rowptr,
100  const Teuchos::ArrayView<Ordinal>& CRS_colind,
101  const Teuchos::ArrayView<Scalar>& CRS_vals);
102 
103 template<typename Ordinal>
104 void
105 sortAndMergeCrsEntries (const Teuchos::ArrayView<size_t>& CRS_rowptr,
106  const Teuchos::ArrayView<Ordinal>& CRS_colind);
107 
123 template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
124 void
125 lowCommunicationMakeColMapAndReindex (const Teuchos::ArrayView<const size_t> &rowPointers,
126  const Teuchos::ArrayView<LocalOrdinal> &columnIndices_LID,
127  const Teuchos::ArrayView<GlobalOrdinal> &columnIndices_GID,
128  const Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > & domainMap,
129  const Teuchos::ArrayView<const int> &owningPids,
130  Teuchos::Array<int> &remotePids,
131  Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > & colMap);
132 
133 
134 
135 
149  template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
150  void getTwoTransferOwnershipVector(const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& transferThatDefinesOwnership,
151  bool useReverseModeForOwnership,
152  const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& transferForMigratingData,
153  bool useReverseModeForMigration,
155 
156 } // namespace Import_Util
157 } // namespace Tpetra
158 
159 
160 //
161 // Implementations
162 //
163 
164 namespace Tpetra {
165 namespace Import_Util {
166 
167 
168 template<typename PID, typename GlobalOrdinal>
169 bool sort_PID_then_GID(const std::pair<PID,GlobalOrdinal> &a,
170  const std::pair<PID,GlobalOrdinal> &b)
171 {
172  if(a.first!=b.first)
173  return (a.first < b.first);
174  return (a.second < b.second);
175 }
176 
177 template<typename PID,
178  typename GlobalOrdinal,
179  typename LocalOrdinal>
180 bool sort_PID_then_pair_GID_LID(const std::pair<PID, std::pair< GlobalOrdinal, LocalOrdinal > > &a,
181  const std::pair<PID, std::pair< GlobalOrdinal, LocalOrdinal > > &b)
182 {
183  if(a.first!=b.first)
184  return a.first < b.first;
185  else
186  return (a.second.first < b.second.first);
187 }
188 
189 template<typename Scalar,
190  typename LocalOrdinal,
191  typename GlobalOrdinal,
192  typename Node>
193 void
194 reverseNeighborDiscovery(const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> & SourceMatrix,
195  const Teuchos::ArrayRCP<const size_t> & rowptr,
196  const Teuchos::ArrayRCP<const LocalOrdinal> & colind,
198  Teuchos::RCP<const Tpetra::Import<LocalOrdinal,GlobalOrdinal,Node> > MyImporter,
199  Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > MyDomainMap,
200  Teuchos::ArrayRCP<int>& type3PIDs,
201  Teuchos::ArrayRCP<LocalOrdinal>& type3LIDs,
202  Teuchos::RCP<const Teuchos::Comm<int> >& rcomm)
203 {
204 #ifdef HAVE_TPETRACORE_MPI
205  using Teuchos::TimeMonitor;
206  using ::Tpetra::Details::Behavior;
207  using Kokkos::AllowPadding;
208  using Kokkos::view_alloc;
209  using Kokkos::WithoutInitializing;
210  typedef LocalOrdinal LO;
211  typedef GlobalOrdinal GO;
212  typedef std::pair<GO,GO> pidgidpair_t;
213  using Teuchos::RCP;
214  const std::string prefix {" Import_Util2::ReverseND:: "};
215  const std::string label ("IU2::Neighbor");
216 
217  // There can be no neighbor discovery if you don't have an importer
218  if(MyImporter.is_null()) return;
219 
220  std::ostringstream errstr;
221  bool error = false;
222  auto const comm = MyDomainMap->getComm();
223 
224  MPI_Comm rawComm = getRawMpiComm(*comm);
225  const int MyPID = rcomm->getRank ();
226 
227  // Things related to messages I am sending in forward mode (RowTransfer)
228  // *** Note: this will be incorrect for transferAndFillComplete if it is in reverse mode. FIXME cbl.
229  auto ExportPIDs = RowTransfer.getExportPIDs();
230  auto ExportLIDs = RowTransfer.getExportLIDs();
231  auto NumExportLIDs = RowTransfer.getNumExportIDs();
232 
233  Distributor & Distor = MyImporter->getDistributor();
234  const size_t NumRecvs = Distor.getNumReceives();
235  const size_t NumSends = Distor.getNumSends();
236  auto RemoteLIDs = MyImporter->getRemoteLIDs();
237  auto const ProcsFrom = Distor.getProcsFrom();
238  auto const ProcsTo = Distor.getProcsTo();
239 
240  auto LengthsFrom = Distor.getLengthsFrom();
241  auto MyColMap = SourceMatrix.getColMap();
242  const size_t numCols = MyColMap->getNodeNumElements ();
243  RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > target = MyImporter->getTargetMap();
244 
245  // Get the owning pids in a special way,
246  // s.t. ProcsFrom[RemotePIDs[i]] is the proc that owns RemoteLIDs[j]....
247  Teuchos::Array<int> RemotePIDOrder(numCols,-1);
248 
249  // For each remote ID, record index into ProcsFrom, who owns it.
250  for (size_t i = 0, j = 0; i < NumRecvs; ++i) {
251  for (size_t k = 0; k < LengthsFrom[i]; ++k) {
252  const int pid = ProcsFrom[i];
253  if (pid != MyPID) {
254  RemotePIDOrder[RemoteLIDs[j]] = i;
255  }
256  j++;
257  }
258  }
259 
260  // Step One: Start tacking the (GID,PID) pairs on the std sets
261  //
262  // For each index in ProcsFrom, we will insert into a set of (PID,
263  // GID) pairs, in order to build a list of such pairs for each of
264  // those processes. Since this is building a reverse, we will send
265  // to these processes.
266  Teuchos::Array<int> ReverseSendSizes(NumRecvs,0);
267  // do this as C array to avoid Teuchos::Array value initialization of all reserved memory
268  Teuchos::Array< Teuchos::ArrayRCP<pidgidpair_t > > RSB(NumRecvs);
269 
270  {
271 #ifdef HAVE_TPETRA_MMM_TIMINGS
272  TimeMonitor set_all(*TimeMonitor::getNewTimer(prefix + std::string("isMMallSetRSB")));
273 #endif
274 
275  // 25 Jul 2018: CBL
276  // todo:std::unordered_set (hash table),
277  // with an adequate prereservation ("bucket count").
278  // An onordered_set has to have a custom hasher for pid/gid pair
279  // However, when pidsets is copied to RSB, it will be in key
280  // order _not_ in pid,gid order. (unlike std::set).
281  // Impliment this with a reserve, and time BOTH building pidsets
282  // _and_ the sort after the receive. Even if unordered_set saves
283  // time, if it causes the sort to be longer, it's not a win.
284 
285  Teuchos::Array<std::set<pidgidpair_t>> pidsets(NumRecvs);
286  {
287 #ifdef HAVE_TPETRA_MMM_TIMINGS
288  TimeMonitor set_insert(*TimeMonitor::getNewTimer(prefix + std::string("isMMallSetRSBinsert")));
289 #endif
290  for(size_t i=0; i < NumExportLIDs; i++) {
291  LO lid = ExportLIDs[i];
292  GO exp_pid = ExportPIDs[i];
293  for(auto j=rowptr[lid]; j<rowptr[lid+1]; j++){
294  int pid_order = RemotePIDOrder[colind[j]];
295  if(pid_order!=-1) {
296  GO gid = MyColMap->getGlobalElement(colind[j]); //Epetra SM.GCID46 =>sm->graph-> {colmap(colind)}
297  auto tpair = pidgidpair_t(exp_pid,gid);
298  pidsets[pid_order].insert(pidsets[pid_order].end(),tpair);
299  }
300  }
301  }
302  }
303 
304  {
305 #ifdef HAVE_TPETRA_MMM_TIMINGS
306  TimeMonitor set_cpy(*TimeMonitor::getNewTimer(prefix + std::string("isMMallSetRSBcpy")));
307 #endif
308  int jj = 0;
309  for(auto &&ps : pidsets) {
310  auto s = ps.size();
311  RSB[jj] = Teuchos::arcp(new pidgidpair_t[ s ],0, s ,true);
312  std::copy(ps.begin(),ps.end(),RSB[jj]);
313  ReverseSendSizes[jj]=s;
314  ++jj;
315  }
316  }
317  } // end of set based packing.
318 
319  Teuchos::Array<int> ReverseRecvSizes(NumSends,-1);
320  Teuchos::Array<MPI_Request> rawBreq(ProcsFrom.size()+ProcsTo.size(), MPI_REQUEST_NULL);
321  // 25 Jul 2018: MPI_TAG_UB is the largest tag value; could be < 32768.
322  const int mpi_tag_base_ = 3;
323 
324  int mpireq_idx=0;
325  for(int i=0;i<ProcsTo.size();++i) {
326  int Rec_Tag = mpi_tag_base_ + ProcsTo[i];
327  int * thisrecv = (int *) (&ReverseRecvSizes[i]);
328  MPI_Request rawRequest = MPI_REQUEST_NULL;
329  MPI_Irecv(const_cast<int*>(thisrecv),
330  1,
331  MPI_INT,
332  ProcsTo[i],
333  Rec_Tag,
334  rawComm,
335  &rawRequest);
336  rawBreq[mpireq_idx++]=rawRequest;
337  }
338  for(int i=0;i<ProcsFrom.size();++i) {
339  int Send_Tag = mpi_tag_base_ + MyPID;
340  int * mysend = ( int *)(&ReverseSendSizes[i]);
341  MPI_Request rawRequest = MPI_REQUEST_NULL;
342  MPI_Isend(mysend,
343  1,
344  MPI_INT,
345  ProcsFrom[i],
346  Send_Tag,
347  rawComm,
348  &rawRequest);
349  rawBreq[mpireq_idx++]=rawRequest;
350  }
351  Teuchos::Array<MPI_Status> rawBstatus(rawBreq.size());
352 #ifdef HAVE_TPETRA_DEBUG
353  const int err1 =
354 #endif
355  MPI_Waitall (rawBreq.size(), rawBreq.getRawPtr(),
356  rawBstatus.getRawPtr());
357 
358 
359 #ifdef HAVE_TPETRA_DEBUG
360  if(err1) {
361  errstr <<MyPID<< "sE1 reverseNeighborDiscovery Mpi_Waitall error on send ";
362  error=true;
363  std::cerr<<errstr.str()<<std::flush;
364  }
365 #endif
366 
367  int totalexportpairrecsize = 0;
368  for (size_t i = 0; i < NumSends; ++i) {
369  totalexportpairrecsize += ReverseRecvSizes[i];
370 #ifdef HAVE_TPETRA_DEBUG
371  if(ReverseRecvSizes[i]<0) {
372  errstr << MyPID << "E4 reverseNeighborDiscovery: got < 0 for receive size "<<ReverseRecvSizes[i]<<std::endl;
373  error=true;
374  }
375 #endif
376  }
377  Teuchos::ArrayRCP<pidgidpair_t >AllReverseRecv= Teuchos::arcp(new pidgidpair_t[totalexportpairrecsize],0,totalexportpairrecsize,true);
378  int offset = 0;
379  mpireq_idx=0;
380  for(int i=0;i<ProcsTo.size();++i) {
381  int recv_data_size = ReverseRecvSizes[i]*2;
382  int recvData_MPI_Tag = mpi_tag_base_*2 + ProcsTo[i];
383  MPI_Request rawRequest = MPI_REQUEST_NULL;
384  GO * rec_bptr = (GO*) (&AllReverseRecv[offset]);
385  offset+=ReverseRecvSizes[i];
386  MPI_Irecv(rec_bptr,
387  recv_data_size,
388  ::Tpetra::Details::MpiTypeTraits<GO>::getType(rec_bptr[0]),
389  ProcsTo[i],
390  recvData_MPI_Tag,
391  rawComm,
392  &rawRequest);
393  rawBreq[mpireq_idx++]=rawRequest;
394  }
395  for(int ii=0;ii<ProcsFrom.size();++ii) {
396  GO * send_bptr = (GO*) (RSB[ii].getRawPtr());
397  MPI_Request rawSequest = MPI_REQUEST_NULL;
398  int send_data_size = ReverseSendSizes[ii]*2; // 2 == count of pair
399  int sendData_MPI_Tag = mpi_tag_base_*2+MyPID;
400  MPI_Isend(send_bptr,
401  send_data_size,
402  ::Tpetra::Details::MpiTypeTraits<GO>::getType(send_bptr[0]),
403  ProcsFrom[ii],
404  sendData_MPI_Tag,
405  rawComm,
406  &rawSequest);
407 
408  rawBreq[mpireq_idx++]=rawSequest;
409  }
410 #ifdef HAVE_TPETRA_DEBUG
411  const int err =
412 #endif
413  MPI_Waitall (rawBreq.size(),
414  rawBreq.getRawPtr(),
415  rawBstatus.getRawPtr());
416 #ifdef HAVE_TPETRA_DEBUG
417  if(err) {
418  errstr <<MyPID<< "E3.r reverseNeighborDiscovery Mpi_Waitall error on receive ";
419  error=true;
420  std::cerr<<errstr.str()<<std::flush;
421  }
422 #endif
423  std::sort(AllReverseRecv.begin(), AllReverseRecv.end(), Tpetra::Import_Util::sort_PID_then_GID<GlobalOrdinal, GlobalOrdinal>);
424 
425  auto newEndOfPairs = std::unique(AllReverseRecv.begin(), AllReverseRecv.end());
426 // don't resize to remove non-unique, just use the end-of-unique iterator
427  if(AllReverseRecv.begin() == newEndOfPairs) return;
428  int ARRsize = std::distance(AllReverseRecv.begin(),newEndOfPairs);
429  auto rPIDs = Teuchos::arcp(new int[ARRsize],0,ARRsize,true);
430  auto rLIDs = Teuchos::arcp(new LocalOrdinal[ARRsize],0,ARRsize,true);
431 
432  int tsize=0;
433  for(auto itr = AllReverseRecv.begin(); itr!=newEndOfPairs; ++itr ) {
434  if((int)(itr->first) != MyPID) {
435  rPIDs[tsize]=(int)itr->first;
436  LocalOrdinal lid = MyDomainMap->getLocalElement(itr->second);
437  rLIDs[tsize]=lid;
438  tsize++;
439  }
440  }
441 
442  type3PIDs=rPIDs.persistingView(0,tsize);
443  type3LIDs=rLIDs.persistingView(0,tsize);
444 
445  if(error){
446  std::cerr<<errstr.str()<<std::flush;
447  comm->barrier();
448  comm->barrier();
449  comm->barrier();
450  MPI_Abort (MPI_COMM_WORLD, -1);
451  }
452 #endif
453 }
454 
455 // Note: This should get merged with the other Tpetra sort routines eventually.
456 template<typename Scalar, typename Ordinal>
457 void
458 sortCrsEntries (const Teuchos::ArrayView<size_t> &CRS_rowptr,
459  const Teuchos::ArrayView<Ordinal> & CRS_colind,
460  const Teuchos::ArrayView<Scalar> &CRS_vals)
461 {
462  // For each row, sort column entries from smallest to largest.
463  // Use shell sort. Stable sort so it is fast if indices are already sorted.
464  // Code copied from Epetra_CrsMatrix::SortEntries()
465  size_t NumRows = CRS_rowptr.size()-1;
466  size_t nnz = CRS_colind.size();
467 
468  const bool permute_values_array = CRS_vals.size() > 0;
469 
470  for(size_t i = 0; i < NumRows; i++){
471  size_t start=CRS_rowptr[i];
472  if(start >= nnz) continue;
473 
474  size_t NumEntries = CRS_rowptr[i+1] - start;
475  Teuchos::ArrayRCP<Scalar> locValues;
476  if (permute_values_array)
477  locValues = Teuchos::arcp<Scalar>(&CRS_vals[start], 0, NumEntries, false);
478  Teuchos::ArrayRCP<Ordinal> locIndices(&CRS_colind[start], 0, NumEntries, false);
479 
480  Ordinal n = NumEntries;
481  Ordinal m = 1;
482  while (m<n) m = m*3+1;
483  m /= 3;
484 
485  while(m > 0) {
486  Ordinal max = n - m;
487  for(Ordinal j = 0; j < max; j++) {
488  for(Ordinal k = j; k >= 0; k-=m) {
489  if(locIndices[k+m] >= locIndices[k])
490  break;
491  if (permute_values_array) {
492  Scalar dtemp = locValues[k+m];
493  locValues[k+m] = locValues[k];
494  locValues[k] = dtemp;
495  }
496  Ordinal itemp = locIndices[k+m];
497  locIndices[k+m] = locIndices[k];
498  locIndices[k] = itemp;
499  }
500  }
501  m = m/3;
502  }
503  }
504 }
505 
506 template<typename Ordinal>
507 void
508 sortCrsEntries (const Teuchos::ArrayView<size_t> &CRS_rowptr,
509  const Teuchos::ArrayView<Ordinal> & CRS_colind)
510 {
511  // Generate dummy values array
512  Teuchos::ArrayView<Tpetra::Details::DefaultTypes::scalar_type> CRS_vals;
513  sortCrsEntries (CRS_rowptr, CRS_colind, CRS_vals);
514 }
515 
516 namespace Impl {
517 
518 template<class RowOffsetsType, class ColumnIndicesType, class ValuesType>
519 class SortCrsEntries {
520 private:
521  typedef typename ColumnIndicesType::non_const_value_type ordinal_type;
522  typedef typename ValuesType::non_const_value_type scalar_type;
523 
524 public:
525  SortCrsEntries (const RowOffsetsType& ptr,
526  const ColumnIndicesType& ind,
527  const ValuesType& val) :
528  ptr_ (ptr),
529  ind_ (ind),
530  val_ (val)
531  {
532  static_assert (std::is_signed<ordinal_type>::value, "The type of each "
533  "column index -- that is, the type of each entry of ind "
534  "-- must be signed in order for this functor to work.");
535  }
536 
537  KOKKOS_FUNCTION void operator() (const size_t i) const
538  {
539  const size_t nnz = ind_.extent (0);
540  const size_t start = ptr_(i);
541  const bool permute_values_array = val_.extent(0) > 0;
542 
543  if (start < nnz) {
544  const size_t NumEntries = ptr_(i+1) - start;
545 
546  const ordinal_type n = static_cast<ordinal_type> (NumEntries);
547  ordinal_type m = 1;
548  while (m<n) m = m*3+1;
549  m /= 3;
550 
551  while (m > 0) {
552  ordinal_type max = n - m;
553  for (ordinal_type j = 0; j < max; j++) {
554  for (ordinal_type k = j; k >= 0; k -= m) {
555  const size_t sk = start+k;
556  if (ind_(sk+m) >= ind_(sk)) {
557  break;
558  }
559  if (permute_values_array) {
560  const scalar_type dtemp = val_(sk+m);
561  val_(sk+m) = val_(sk);
562  val_(sk) = dtemp;
563  }
564  const ordinal_type itemp = ind_(sk+m);
565  ind_(sk+m) = ind_(sk);
566  ind_(sk) = itemp;
567  }
568  }
569  m = m/3;
570  }
571  }
572  }
573 
574  static void
575  sortCrsEntries (const RowOffsetsType& ptr,
576  const ColumnIndicesType& ind,
577  const ValuesType& val)
578  {
579  // For each row, sort column entries from smallest to largest.
580  // Use shell sort. Stable sort so it is fast if indices are already sorted.
581  // Code copied from Epetra_CrsMatrix::SortEntries()
582  // NOTE: This should not be taken as a particularly efficient way to sort
583  // rows of matrices in parallel. But it is correct, so that's something.
584  if (ptr.extent (0) == 0) {
585  return; // no rows, so nothing to sort
586  }
587  const size_t NumRows = ptr.extent (0) - 1;
588 
589  typedef size_t index_type; // what this function was using; not my choice
590  typedef typename ValuesType::execution_space execution_space;
591  // Specify RangePolicy explicitly, in order to use correct execution
592  // space. See #1345.
593  typedef Kokkos::RangePolicy<execution_space, index_type> range_type;
594  Kokkos::parallel_for ("sortCrsEntries", range_type (0, NumRows),
595  SortCrsEntries (ptr, ind, val));
596  }
597 
598 private:
599  RowOffsetsType ptr_;
600  ColumnIndicesType ind_;
601  ValuesType val_;
602 };
603 
604 } // namespace Impl
605 
606 template<typename rowptr_array_type, typename colind_array_type, typename vals_array_type>
607 void
608 sortCrsEntries (const rowptr_array_type& CRS_rowptr,
609  const colind_array_type& CRS_colind,
610  const vals_array_type& CRS_vals)
611 {
612  Impl::SortCrsEntries<rowptr_array_type, colind_array_type,
613  vals_array_type>::sortCrsEntries (CRS_rowptr, CRS_colind, CRS_vals);
614 }
615 
616 template<typename rowptr_array_type, typename colind_array_type>
617 void
618 sortCrsEntries (const rowptr_array_type& CRS_rowptr,
619  const colind_array_type& CRS_colind)
620 {
621  // Generate dummy values array
622  typedef typename colind_array_type::execution_space execution_space;
623  typedef Tpetra::Details::DefaultTypes::scalar_type scalar_type;
624  typedef typename Kokkos::View<scalar_type*, execution_space> scalar_view_type;
625  scalar_view_type CRS_vals;
626  sortCrsEntries<rowptr_array_type, colind_array_type,
627  scalar_view_type>(CRS_rowptr, CRS_colind, CRS_vals);
628 }
629 
630 // Note: This should get merged with the other Tpetra sort routines eventually.
631 template<typename Scalar, typename Ordinal>
632 void
633 sortAndMergeCrsEntries (const Teuchos::ArrayView<size_t> &CRS_rowptr,
634  const Teuchos::ArrayView<Ordinal> & CRS_colind,
635  const Teuchos::ArrayView<Scalar> &CRS_vals)
636 {
637  // For each row, sort column entries from smallest to largest,
638  // merging column ids that are identify by adding values. Use shell
639  // sort. Stable sort so it is fast if indices are already sorted.
640  // Code copied from Epetra_CrsMatrix::SortEntries()
641 
642  if (CRS_rowptr.size () == 0) {
643  return; // no rows, so nothing to sort
644  }
645  const size_t NumRows = CRS_rowptr.size () - 1;
646  const size_t nnz = CRS_colind.size ();
647  size_t new_curr = CRS_rowptr[0];
648  size_t old_curr = CRS_rowptr[0];
649 
650  const bool permute_values_array = CRS_vals.size() > 0;
651 
652  for(size_t i = 0; i < NumRows; i++){
653  const size_t old_rowptr_i=CRS_rowptr[i];
654  CRS_rowptr[i] = old_curr;
655  if(old_rowptr_i >= nnz) continue;
656 
657  size_t NumEntries = CRS_rowptr[i+1] - old_rowptr_i;
658  Teuchos::ArrayRCP<Scalar> locValues;
659  if (permute_values_array)
660  locValues = Teuchos::arcp<Scalar>(&CRS_vals[old_rowptr_i], 0, NumEntries, false);
661  Teuchos::ArrayRCP<Ordinal> locIndices(&CRS_colind[old_rowptr_i], 0, NumEntries, false);
662 
663  // Sort phase
664  Ordinal n = NumEntries;
665  Ordinal m = n/2;
666 
667  while(m > 0) {
668  Ordinal max = n - m;
669  for(Ordinal j = 0; j < max; j++) {
670  for(Ordinal k = j; k >= 0; k-=m) {
671  if(locIndices[k+m] >= locIndices[k])
672  break;
673  if (permute_values_array) {
674  Scalar dtemp = locValues[k+m];
675  locValues[k+m] = locValues[k];
676  locValues[k] = dtemp;
677  }
678  Ordinal itemp = locIndices[k+m];
679  locIndices[k+m] = locIndices[k];
680  locIndices[k] = itemp;
681  }
682  }
683  m = m/2;
684  }
685 
686  // Merge & shrink
687  for(size_t j=old_rowptr_i; j < CRS_rowptr[i+1]; j++) {
688  if(j > old_rowptr_i && CRS_colind[j]==CRS_colind[new_curr-1]) {
689  if (permute_values_array) CRS_vals[new_curr-1] += CRS_vals[j];
690  }
691  else if(new_curr==j) {
692  new_curr++;
693  }
694  else {
695  CRS_colind[new_curr] = CRS_colind[j];
696  if (permute_values_array) CRS_vals[new_curr] = CRS_vals[j];
697  new_curr++;
698  }
699  }
700  old_curr=new_curr;
701  }
702 
703  CRS_rowptr[NumRows] = new_curr;
704 }
705 
706 template<typename Ordinal>
707 void
708 sortAndMergeCrsEntries (const Teuchos::ArrayView<size_t> &CRS_rowptr,
709  const Teuchos::ArrayView<Ordinal> & CRS_colind)
710 {
711  Teuchos::ArrayView<Tpetra::Details::DefaultTypes::scalar_type> CRS_vals;
712  return sortAndMergeCrsEntries(CRS_rowptr, CRS_colind, CRS_vals);
713 }
714 
715 
716 template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
717 void
718 lowCommunicationMakeColMapAndReindex (const Teuchos::ArrayView<const size_t> &rowptr,
719  const Teuchos::ArrayView<LocalOrdinal> &colind_LID,
720  const Teuchos::ArrayView<GlobalOrdinal> &colind_GID,
721  const Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >& domainMapRCP,
722  const Teuchos::ArrayView<const int> &owningPIDs,
723  Teuchos::Array<int> &remotePIDs,
724  Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > & colMap)
725 {
726  using Teuchos::rcp;
727  typedef LocalOrdinal LO;
728  typedef GlobalOrdinal GO;
729  typedef Tpetra::global_size_t GST;
730  typedef Tpetra::Map<LO, GO, Node> map_type;
731  const char prefix[] = "lowCommunicationMakeColMapAndReindex: ";
732 
733  // The domainMap is an RCP because there is a shortcut for a
734  // (common) special case to return the columnMap = domainMap.
735  const map_type& domainMap = *domainMapRCP;
736 
737  // Scan all column indices and sort into two groups:
738  // Local: those whose GID matches a GID of the domain map on this processor and
739  // Remote: All others.
740  const size_t numDomainElements = domainMap.getNodeNumElements ();
741  Teuchos::Array<bool> LocalGIDs;
742  if (numDomainElements > 0) {
743  LocalGIDs.resize (numDomainElements, false); // Assume domain GIDs are not local
744  }
745 
746  // In principle it is good to have RemoteGIDs and RemotGIDList be as
747  // long as the number of remote GIDs on this processor, but this
748  // would require two passes through the column IDs, so we make it
749  // the max of 100 and the number of block rows.
750  //
751  // FIXME (mfh 11 Feb 2015) Tpetra::Details::HashTable can hold at
752  // most INT_MAX entries, but it's possible to have more rows than
753  // that (if size_t is 64 bits and int is 32 bits).
754  const size_t numMyRows = rowptr.size () - 1;
755  const int hashsize = std::max (static_cast<int> (numMyRows), 100);
756 
757  Tpetra::Details::HashTable<GO, LO> RemoteGIDs (hashsize);
758  Teuchos::Array<GO> RemoteGIDList;
759  RemoteGIDList.reserve (hashsize);
760  Teuchos::Array<int> PIDList;
761  PIDList.reserve (hashsize);
762 
763  // Here we start using the *LocalOrdinal* colind_LID array. This is
764  // safe even if both columnIndices arrays are actually the same
765  // (because LocalOrdinal==GO). For *local* GID's set
766  // colind_LID with with their LID in the domainMap. For *remote*
767  // GIDs, we set colind_LID with (numDomainElements+NumRemoteColGIDs)
768  // before the increment of the remote count. These numberings will
769  // be separate because no local LID is greater than
770  // numDomainElements.
771 
772  size_t NumLocalColGIDs = 0;
773  LO NumRemoteColGIDs = 0;
774  for (size_t i = 0; i < numMyRows; ++i) {
775  for(size_t j = rowptr[i]; j < rowptr[i+1]; ++j) {
776  const GO GID = colind_GID[j];
777  // Check if GID matches a row GID
778  const LO LID = domainMap.getLocalElement (GID);
779  if(LID != -1) {
780  const bool alreadyFound = LocalGIDs[LID];
781  if (! alreadyFound) {
782  LocalGIDs[LID] = true; // There is a column in the graph associated with this domain map GID
783  NumLocalColGIDs++;
784  }
785  colind_LID[j] = LID;
786  }
787  else {
788  const LO hash_value = RemoteGIDs.get (GID);
789  if (hash_value == -1) { // This means its a new remote GID
790  const int PID = owningPIDs[j];
791  TEUCHOS_TEST_FOR_EXCEPTION(
792  PID == -1, std::invalid_argument, prefix << "Cannot figure out if "
793  "PID is owned.");
794  colind_LID[j] = static_cast<LO> (numDomainElements + NumRemoteColGIDs);
795  RemoteGIDs.add (GID, NumRemoteColGIDs);
796  RemoteGIDList.push_back (GID);
797  PIDList.push_back (PID);
798  NumRemoteColGIDs++;
799  }
800  else {
801  colind_LID[j] = static_cast<LO> (numDomainElements + hash_value);
802  }
803  }
804  }
805  }
806 
807  // Possible short-circuit: If all domain map GIDs are present as
808  // column indices, then set ColMap=domainMap and quit.
809  if (domainMap.getComm ()->getSize () == 1) {
810  // Sanity check: When there is only one process, there can be no
811  // remoteGIDs.
812  TEUCHOS_TEST_FOR_EXCEPTION(
813  NumRemoteColGIDs != 0, std::runtime_error, prefix << "There is only one "
814  "process in the domain Map's communicator, which means that there are no "
815  "\"remote\" indices. Nevertheless, some column indices are not in the "
816  "domain Map.");
817  if (static_cast<size_t> (NumLocalColGIDs) == numDomainElements) {
818  // In this case, we just use the domainMap's indices, which is,
819  // not coincidently, what we clobbered colind with up above
820  // anyway. No further reindexing is needed.
821  colMap = domainMapRCP;
822  return;
823  }
824  }
825 
826  // Now build the array containing column GIDs
827  // Build back end, containing remote GIDs, first
828  const LO numMyCols = NumLocalColGIDs + NumRemoteColGIDs;
829  Teuchos::Array<GO> ColIndices;
830  GO* RemoteColIndices = NULL;
831  if (numMyCols > 0) {
832  ColIndices.resize (numMyCols);
833  if (NumLocalColGIDs != static_cast<size_t> (numMyCols)) {
834  RemoteColIndices = &ColIndices[NumLocalColGIDs]; // Points to back half of ColIndices
835  }
836  }
837 
838  for (LO i = 0; i < NumRemoteColGIDs; ++i) {
839  RemoteColIndices[i] = RemoteGIDList[i];
840  }
841 
842  // Build permute array for *remote* reindexing.
843  Teuchos::Array<LO> RemotePermuteIDs (NumRemoteColGIDs);
844  for (LO i = 0; i < NumRemoteColGIDs; ++i) {
845  RemotePermuteIDs[i]=i;
846  }
847 
848  // Sort External column indices so that all columns coming from a
849  // given remote processor are contiguous. This is a sort with two
850  // auxilary arrays: RemoteColIndices and RemotePermuteIDs.
851  Tpetra::sort3 (PIDList.begin (), PIDList.end (),
852  ColIndices.begin () + NumLocalColGIDs,
853  RemotePermuteIDs.begin ());
854 
855  // Stash the RemotePIDs.
856  //
857  // Note: If Teuchos::Array had a shrink_to_fit like std::vector,
858  // we'd call it here.
859  remotePIDs = PIDList;
860 
861  // Sort external column indices so that columns from a given remote
862  // processor are not only contiguous but also in ascending
863  // order. NOTE: I don't know if the number of externals associated
864  // with a given remote processor is known at this point ... so I
865  // count them here.
866 
867  // NTS: Only sort the RemoteColIndices this time...
868  LO StartCurrent = 0, StartNext = 1;
869  while (StartNext < NumRemoteColGIDs) {
870  if (PIDList[StartNext]==PIDList[StartNext-1]) {
871  StartNext++;
872  }
873  else {
874  Tpetra::sort2 (ColIndices.begin () + NumLocalColGIDs + StartCurrent,
875  ColIndices.begin () + NumLocalColGIDs + StartNext,
876  RemotePermuteIDs.begin () + StartCurrent);
877  StartCurrent = StartNext;
878  StartNext++;
879  }
880  }
881  Tpetra::sort2 (ColIndices.begin () + NumLocalColGIDs + StartCurrent,
882  ColIndices.begin () + NumLocalColGIDs + StartNext,
883  RemotePermuteIDs.begin () + StartCurrent);
884 
885  // Reverse the permutation to get the information we actually care about
886  Teuchos::Array<LO> ReverseRemotePermuteIDs (NumRemoteColGIDs);
887  for (LO i = 0; i < NumRemoteColGIDs; ++i) {
888  ReverseRemotePermuteIDs[RemotePermuteIDs[i]] = i;
889  }
890 
891  // Build permute array for *local* reindexing.
892  bool use_local_permute = false;
893  Teuchos::Array<LO> LocalPermuteIDs (numDomainElements);
894 
895  // Now fill front end. Two cases:
896  //
897  // (1) If the number of Local column GIDs is the same as the number
898  // of Local domain GIDs, we can simply read the domain GIDs into
899  // the front part of ColIndices, otherwise
900  //
901  // (2) We step through the GIDs of the domainMap, checking to see if
902  // each domain GID is a column GID. we want to do this to
903  // maintain a consistent ordering of GIDs between the columns
904  // and the domain.
905  Teuchos::ArrayView<const GO> domainGlobalElements = domainMap.getNodeElementList();
906  if (static_cast<size_t> (NumLocalColGIDs) == numDomainElements) {
907  if (NumLocalColGIDs > 0) {
908  // Load Global Indices into first numMyCols elements column GID list
909  std::copy (domainGlobalElements.begin (), domainGlobalElements.end (),
910  ColIndices.begin ());
911  }
912  }
913  else {
914  LO NumLocalAgain = 0;
915  use_local_permute = true;
916  for (size_t i = 0; i < numDomainElements; ++i) {
917  if (LocalGIDs[i]) {
918  LocalPermuteIDs[i] = NumLocalAgain;
919  ColIndices[NumLocalAgain++] = domainGlobalElements[i];
920  }
921  }
922  TEUCHOS_TEST_FOR_EXCEPTION(
923  static_cast<size_t> (NumLocalAgain) != NumLocalColGIDs,
924  std::runtime_error, prefix << "Local ID count test failed.");
925  }
926 
927  // Make column Map
928  const GST minus_one = Teuchos::OrdinalTraits<GST>::invalid ();
929  colMap = rcp (new map_type (minus_one, ColIndices, domainMap.getIndexBase (),
930  domainMap.getComm ()));
931 
932  // Low-cost reindex of the matrix
933  for (size_t i = 0; i < numMyRows; ++i) {
934  for (size_t j = rowptr[i]; j < rowptr[i+1]; ++j) {
935  const LO ID = colind_LID[j];
936  if (static_cast<size_t> (ID) < numDomainElements) {
937  if (use_local_permute) {
938  colind_LID[j] = LocalPermuteIDs[colind_LID[j]];
939  }
940  // In the case where use_local_permute==false, we just copy
941  // the DomainMap's ordering, which it so happens is what we
942  // put in colind_LID to begin with.
943  }
944  else {
945  colind_LID[j] = NumLocalColGIDs + ReverseRemotePermuteIDs[colind_LID[j]-numDomainElements];
946  }
947  }
948  }
949 }
950 
951 
952 
953 
954 // Generates an list of owning PIDs based on two transfer (aka import/export objects)
955 // Let:
956 // OwningMap = useReverseModeForOwnership ? transferThatDefinesOwnership.getTargetMap() : transferThatDefinesOwnership.getSourceMap();
957 // MapAo = useReverseModeForOwnership ? transferThatDefinesOwnership.getSourceMap() : transferThatDefinesOwnership.getTargetMap();
958 // MapAm = useReverseModeForMigration ? transferThatDefinesMigration.getTargetMap() : transferThatDefinesMigration.getSourceMap();
959 // VectorMap = useReverseModeForMigration ? transferThatDefinesMigration.getSourceMap() : transferThatDefinesMigration.getTargetMap();
960 // Precondition:
961 // 1) MapAo.isSameAs(*MapAm) - map compatibility between transfers
962 // 2) VectorMap->isSameAs(*owningPIDs->getMap()) - map compabibility between transfer & vector
963 // 3) OwningMap->isOneToOne() - owning map is 1-to-1
964 // --- Precondition 3 is only checked in DEBUG mode ---
965 // Postcondition:
966 // owningPIDs[VectorMap->getLocalElement(GID i)] = j iff (OwningMap->isLocalElement(GID i) on rank j)
967 template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
968 void getTwoTransferOwnershipVector(const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& transferThatDefinesOwnership,
969  bool useReverseModeForOwnership,
970  const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& transferThatDefinesMigration,
971  bool useReverseModeForMigration,
975 
976  Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > OwningMap = useReverseModeForOwnership ?
977  transferThatDefinesOwnership.getTargetMap() :
978  transferThatDefinesOwnership.getSourceMap();
979  Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > MapAo = useReverseModeForOwnership ?
980  transferThatDefinesOwnership.getSourceMap() :
981  transferThatDefinesOwnership.getTargetMap();
982  Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > MapAm = useReverseModeForMigration ?
983  transferThatDefinesMigration.getTargetMap() :
984  transferThatDefinesMigration.getSourceMap();
985  Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > VectorMap = useReverseModeForMigration ?
986  transferThatDefinesMigration.getSourceMap() :
987  transferThatDefinesMigration.getTargetMap();
988 
989  TEUCHOS_TEST_FOR_EXCEPTION(!MapAo->isSameAs(*MapAm),std::runtime_error,"Tpetra::Import_Util::getTwoTransferOwnershipVector map mismatch between transfers");
990  TEUCHOS_TEST_FOR_EXCEPTION(!VectorMap->isSameAs(*owningPIDs.getMap()),std::runtime_error,"Tpetra::Import_Util::getTwoTransferOwnershipVector map mismatch transfer and vector");
991 #ifdef HAVE_TPETRA_DEBUG
992  TEUCHOS_TEST_FOR_EXCEPTION(!OwningMap->isOneToOne(),std::runtime_error,"Tpetra::Import_Util::getTwoTransferOwnershipVector owner must be 1-to-1");
993 #endif
994 
995  int rank = OwningMap->getComm()->getRank();
996  // Generate "A" vector and fill it with owning information. We can read this from transferThatDefinesOwnership w/o communication
997  // Note: Due to the 1-to-1 requirement, several of these options throw
999  const import_type* ownAsImport = dynamic_cast<const import_type*> (&transferThatDefinesOwnership);
1000  const export_type* ownAsExport = dynamic_cast<const export_type*> (&transferThatDefinesOwnership);
1001 
1002  Teuchos::ArrayRCP<int> pids = temp.getDataNonConst();
1003  Teuchos::ArrayView<int> v_pids = pids();
1004  if(ownAsImport && useReverseModeForOwnership) {TEUCHOS_TEST_FOR_EXCEPTION(1,std::runtime_error,"Tpetra::Import_Util::getTwoTransferOwnershipVector owner must be 1-to-1");}
1005  else if(ownAsImport && !useReverseModeForOwnership) getPids(*ownAsImport,v_pids,false);
1006  else if(ownAsExport && useReverseModeForMigration) {TEUCHOS_TEST_FOR_EXCEPTION(1,std::runtime_error,"Tpetra::Import_Util::getTwoTransferOwnershipVector this option not yet implemented");}
1007  else {TEUCHOS_TEST_FOR_EXCEPTION(1,std::runtime_error,"Tpetra::Import_Util::getTwoTransferOwnershipVector owner must be 1-to-1");}
1008 
1009  const import_type* xferAsImport = dynamic_cast<const import_type*> (&transferThatDefinesMigration);
1010  const export_type* xferAsExport = dynamic_cast<const export_type*> (&transferThatDefinesMigration);
1011  TEUCHOS_TEST_FOR_EXCEPTION(!xferAsImport && !xferAsExport,std::runtime_error,"Tpetra::Import_Util::getTwoTransferOwnershipVector transfer undefined");
1012 
1013  // Migrate from "A" vector to output vector
1014  owningPIDs.putScalar(rank);
1015  if(xferAsImport && useReverseModeForMigration) owningPIDs.doExport(temp,*xferAsImport,Tpetra::REPLACE);
1016  else if(xferAsImport && !useReverseModeForMigration) owningPIDs.doImport(temp,*xferAsImport,Tpetra::REPLACE);
1017  else if(xferAsExport && useReverseModeForMigration) owningPIDs.doImport(temp,*xferAsExport,Tpetra::REPLACE);
1018  else owningPIDs.doExport(temp,*xferAsExport,Tpetra::REPLACE);
1019 
1020 }
1021 
1022 
1023 
1024 } // namespace Import_Util
1025 } // namespace Tpetra
1026 
1027 #endif // TPETRA_IMPORT_UTIL_HPP
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Add specializations of Teuchos::Details::MpiTypeTraits for Kokkos::complex&lt;float&gt; and Kokkos::complex...
void sort3(const IT1 &first1, const IT1 &last1, const IT2 &first2, const IT3 &first3)
Sort the first array, and apply the same permutation to the second and third arrays.
void doImport(const SrcDistObject &source, const Import< LocalOrdinal, GlobalOrdinal, Node > &importer, const CombineMode CM, const bool restrictedMode=false)
Import data into this object using an Import object (&quot;forward mode&quot;).
void putScalar(const Scalar &value)
Set all values in the multivector with the given value.
Declaration of the Tpetra::CrsMatrix class.
Teuchos::ArrayView< const int > getExportPIDs() const
List of processes to which entries will be sent.
Teuchos::RCP< const map_type > getTargetMap() const
The target Map used to construct this Export or Import.
size_t global_size_t
Global size_t object.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
size_t getNumExportIDs() const
Number of entries that must be sent by the calling process to other processes.
Declaration and definition of Tpetra::Details::reallocDualViewIfNeeded, an implementation detail of T...
Replace existing values with new values.
Teuchos::ArrayView< const LO > getExportLIDs() const
List of entries in the source Map that will be sent to other processes.
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2)
Sort the first array, and apply the resulting permutation to the second array.
size_t getNodeNumElements() const
The number of elements belonging to the calling process.
A parallel distribution of indices over processes.
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Export data into this object using an Export object (&quot;forward mode&quot;).
A distributed dense vector.
Stand-alone utility functions and macros.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.