Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_CrsGraph_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_CRSGRAPH_DEF_HPP
11 #define TPETRA_CRSGRAPH_DEF_HPP
12 
15 
20 #include "Tpetra_Details_getGraphDiagOffsets.hpp"
21 #include "Tpetra_Details_getGraphOffRankOffsets.hpp"
22 #include "Tpetra_Details_makeColMap.hpp"
26 #include "Tpetra_Distributor.hpp"
27 #include "Teuchos_SerialDenseMatrix.hpp"
28 #include "Tpetra_Vector.hpp"
29 #include "Tpetra_Import_Util.hpp"
30 #include "Tpetra_Import_Util2.hpp"
31 #include "Tpetra_Details_packCrsGraph.hpp"
32 #include "Tpetra_Details_unpackCrsGraphAndCombine.hpp"
33 #include "Tpetra_Details_CrsPadding.hpp"
34 #include "Tpetra_Util.hpp"
35 #include <algorithm>
36 #include <limits>
37 #include <map>
38 #include <sstream>
39 #include <string>
40 #include <type_traits>
41 #include <utility>
42 #include <vector>
43 
44 namespace Tpetra {
45  namespace Details {
46  namespace Impl {
47 
48  template<class MapIter>
49  void
50  verbosePrintMap(std::ostream& out,
51  MapIter beg,
52  MapIter end,
53  const size_t numEnt,
54  const char mapName[])
55  {
56  using ::Tpetra::Details::Behavior;
58 
59  out << mapName << ": {";
60  const size_t maxNumToPrint =
62  if (maxNumToPrint == 0) {
63  if (numEnt != 0) {
64  out << "...";
65  }
66  }
67  else {
68  const size_t numToPrint = numEnt > maxNumToPrint ?
69  maxNumToPrint : numEnt;
70  size_t count = 0;
71  for (MapIter it = beg; it != end; ++it) {
72  out << "(" << (*it).first << ", ";
73  verbosePrintArray(out, (*it).second, "gblColInds",
74  maxNumToPrint);
75  out << ")";
76  if (count + size_t(1) < numToPrint) {
77  out << ", ";
78  }
79  ++count;
80  }
81  if (count < numEnt) {
82  out << ", ...";
83  }
84  }
85  out << "}";
86  }
87 
88  template<class LO, class GO, class Node>
89  Teuchos::ArrayView<GO>
90  getRowGraphGlobalRow(
91  std::vector<GO>& gblColIndsStorage,
92  const RowGraph<LO, GO, Node>& graph,
93  const GO gblRowInd)
94  {
95  size_t origNumEnt = graph.getNumEntriesInGlobalRow(gblRowInd);
96  if (gblColIndsStorage.size() < origNumEnt) {
97  gblColIndsStorage.resize(origNumEnt);
98  }
99  typename CrsGraph<LO,GO,Node>::nonconst_global_inds_host_view_type gblColInds(gblColIndsStorage.data(),
100  origNumEnt);
101  graph.getGlobalRowCopy(gblRowInd, gblColInds, origNumEnt);
102  Teuchos::ArrayView<GO> retval(gblColIndsStorage.data(),origNumEnt);
103  return retval;
104  }
105 
106  template<class LO, class GO, class DT, class OffsetType, class NumEntType>
107  class ConvertColumnIndicesFromGlobalToLocal {
108  public:
109  ConvertColumnIndicesFromGlobalToLocal (const ::Kokkos::View<LO*, DT>& lclColInds,
110  const ::Kokkos::View<const GO*, DT>& gblColInds,
111  const ::Kokkos::View<const OffsetType*, DT>& ptr,
112  const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
113  const ::Kokkos::View<const NumEntType*, DT>& numRowEnt) :
114  lclColInds_ (lclColInds),
115  gblColInds_ (gblColInds),
116  ptr_ (ptr),
117  lclColMap_ (lclColMap),
118  numRowEnt_ (numRowEnt)
119  {}
120 
121  KOKKOS_FUNCTION void
122  operator () (const LO& lclRow, OffsetType& curNumBad) const
123  {
124  const OffsetType offset = ptr_(lclRow);
125  // NOTE (mfh 26 Jun 2016) It's always legal to cast the number
126  // of entries in a row to LO, as long as the row doesn't have
127  // too many duplicate entries.
128  const LO numEnt = static_cast<LO> (numRowEnt_(lclRow));
129  for (LO j = 0; j < numEnt; ++j) {
130  const GO gid = gblColInds_(offset + j);
131  const LO lid = lclColMap_.getLocalElement (gid);
132  lclColInds_(offset + j) = lid;
133  if (lid == ::Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
134  ++curNumBad;
135  }
136  }
137  }
138 
139  static OffsetType
140  run (const ::Kokkos::View<LO*, DT>& lclColInds,
141  const ::Kokkos::View<const GO*, DT>& gblColInds,
142  const ::Kokkos::View<const OffsetType*, DT>& ptr,
143  const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
144  const ::Kokkos::View<const NumEntType*, DT>& numRowEnt)
145  {
146  typedef ::Kokkos::RangePolicy<typename DT::execution_space, LO> range_type;
147  typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> functor_type;
148 
149  const LO lclNumRows = ptr.extent (0) == 0 ?
150  static_cast<LO> (0) : static_cast<LO> (ptr.extent (0) - 1);
151  OffsetType numBad = 0;
152  // Count of "bad" column indices is a reduction over rows.
153  ::Kokkos::parallel_reduce (range_type (0, lclNumRows),
154  functor_type (lclColInds, gblColInds, ptr,
155  lclColMap, numRowEnt),
156  numBad);
157  return numBad;
158  }
159 
160  private:
161  ::Kokkos::View<LO*, DT> lclColInds_;
162  ::Kokkos::View<const GO*, DT> gblColInds_;
163  ::Kokkos::View<const OffsetType*, DT> ptr_;
165  ::Kokkos::View<const NumEntType*, DT> numRowEnt_;
166  };
167 
168  } // namespace Impl
169 
184  template<class LO, class GO, class DT, class OffsetType, class NumEntType>
185  OffsetType
186  convertColumnIndicesFromGlobalToLocal (const Kokkos::View<LO*, DT>& lclColInds,
187  const Kokkos::View<const GO*, DT>& gblColInds,
188  const Kokkos::View<const OffsetType*, DT>& ptr,
189  const LocalMap<LO, GO, DT>& lclColMap,
190  const Kokkos::View<const NumEntType*, DT>& numRowEnt)
191  {
192  using Impl::ConvertColumnIndicesFromGlobalToLocal;
193  typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> impl_type;
194  return impl_type::run (lclColInds, gblColInds, ptr, lclColMap, numRowEnt);
195  }
196 
197  template<class ViewType, class LO>
198  class MaxDifference {
199  public:
200  MaxDifference (const ViewType& ptr) : ptr_ (ptr) {}
201 
202  KOKKOS_INLINE_FUNCTION void init (LO& dst) const {
203  dst = 0;
204  }
205 
206  KOKKOS_INLINE_FUNCTION void
207  join (LO& dst, const LO& src) const
208  {
209  dst = (src > dst) ? src : dst;
210  }
211 
212  KOKKOS_INLINE_FUNCTION void
213  operator () (const LO lclRow, LO& maxNumEnt) const
214  {
215  const LO numEnt = static_cast<LO> (ptr_(lclRow+1) - ptr_(lclRow));
216  maxNumEnt = (numEnt > maxNumEnt) ? numEnt : maxNumEnt;
217  }
218  private:
219  typename ViewType::const_type ptr_;
220  };
221 
222  template<class ViewType, class LO>
223  typename ViewType::non_const_value_type
224  maxDifference (const char kernelLabel[],
225  const ViewType& ptr,
226  const LO lclNumRows)
227  {
228  if (lclNumRows == 0) {
229  // mfh 07 May 2018: Weirdly, I need this special case,
230  // otherwise I get the wrong answer.
231  return static_cast<LO> (0);
232  }
233  else {
234  using execution_space = typename ViewType::execution_space;
235  using range_type = Kokkos::RangePolicy<execution_space, LO>;
236  LO theMaxNumEnt {0};
237  Kokkos::parallel_reduce (kernelLabel,
238  range_type (0, lclNumRows),
239  MaxDifference<ViewType, LO> (ptr),
240  theMaxNumEnt);
241  return theMaxNumEnt;
242  }
243  }
244 
245  } // namespace Details
246 
247  template <class LocalOrdinal, class GlobalOrdinal, class Node>
248  bool
249  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
250  getDebug() {
251  return Details::Behavior::debug("CrsGraph");
252  }
253 
254  template <class LocalOrdinal, class GlobalOrdinal, class Node>
255  bool
256  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
257  getVerbose() {
258  return Details::Behavior::verbose("CrsGraph");
259  }
260 
261  template <class LocalOrdinal, class GlobalOrdinal, class Node>
262  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
263  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
264  const size_t maxNumEntriesPerRow,
265  const Teuchos::RCP<Teuchos::ParameterList>& params) :
266  dist_object_type (rowMap)
267  , rowMap_ (rowMap)
268  , numAllocForAllRows_ (maxNumEntriesPerRow)
269  {
270  const char tfecfFuncName[] =
271  "CrsGraph(rowMap,maxNumEntriesPerRow,params): ";
272  staticAssertions ();
273  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
274  (maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid (),
275  std::invalid_argument, "The allocation hint maxNumEntriesPerRow must be "
276  "a valid size_t value, which in this case means it must not be "
277  "Teuchos::OrdinalTraits<size_t>::invalid().");
278  resumeFill (params);
280  }
281 
282  template <class LocalOrdinal, class GlobalOrdinal, class Node>
284  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
285  const Teuchos::RCP<const map_type>& colMap,
286  const size_t maxNumEntriesPerRow,
287  const Teuchos::RCP<Teuchos::ParameterList>& params) :
288  dist_object_type (rowMap)
289  , rowMap_ (rowMap)
290  , colMap_ (colMap)
291  , numAllocForAllRows_ (maxNumEntriesPerRow)
292  {
293  const char tfecfFuncName[] =
294  "CrsGraph(rowMap,colMap,maxNumEntriesPerRow,params): ";
295  staticAssertions ();
296  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
297  maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid (),
298  std::invalid_argument, "The allocation hint maxNumEntriesPerRow must be "
299  "a valid size_t value, which in this case means it must not be "
300  "Teuchos::OrdinalTraits<size_t>::invalid().");
301  resumeFill (params);
303  }
304 
305 
306  template <class LocalOrdinal, class GlobalOrdinal, class Node>
308  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
309  const Teuchos::ArrayView<const size_t>& numEntPerRow,
310  const Teuchos::RCP<Teuchos::ParameterList>& params) :
311  dist_object_type (rowMap)
312  , rowMap_ (rowMap)
313  , numAllocForAllRows_ (0)
314  {
315  const char tfecfFuncName[] =
316  "CrsGraph(rowMap,numEntPerRow,params): ";
317  staticAssertions ();
318 
319  const size_t lclNumRows = rowMap.is_null () ?
320  static_cast<size_t> (0) : rowMap->getLocalNumElements ();
321  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
322  static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
323  std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
324  << " != the local number of rows " << lclNumRows << " as specified by "
325  "the input row Map.");
326 
327  if (debug_) {
328  for (size_t r = 0; r < lclNumRows; ++r) {
329  const size_t curRowCount = numEntPerRow[r];
330  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
331  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
332  std::invalid_argument, "numEntPerRow(" << r << ") "
333  "specifies an invalid number of entries "
334  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
335  }
336  }
337 
338  // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
339  // The latter is a const View, so we have to copy into a nonconst
340  // View first, then assign.
341  typedef decltype (k_numAllocPerRow_) out_view_type;
342  typedef typename out_view_type::non_const_type nc_view_type;
343  typedef Kokkos::View<const size_t*,
344  typename nc_view_type::array_layout,
345  Kokkos::HostSpace,
346  Kokkos::MemoryUnmanaged> in_view_type;
347  in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
348  nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
349  lclNumRows);
350  // DEEP_COPY REVIEW - HOST-TO-HOSTMIRROR
351  using exec_space = typename nc_view_type::execution_space;
352  Kokkos::deep_copy (exec_space(), numAllocPerRowOut, numAllocPerRowIn);
353  k_numAllocPerRow_ = numAllocPerRowOut;
354 
355  resumeFill (params);
357  }
358 
359 
360 
361  template <class LocalOrdinal, class GlobalOrdinal, class Node>
363  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
364  const Kokkos::DualView<const size_t*, device_type>& numEntPerRow,
365  const Teuchos::RCP<Teuchos::ParameterList>& params) :
366  dist_object_type (rowMap)
367  , rowMap_ (rowMap)
368  , k_numAllocPerRow_ (numEntPerRow.view_host())
369  , numAllocForAllRows_ (0)
370  {
371  const char tfecfFuncName[] =
372  "CrsGraph(rowMap,numEntPerRow,params): ";
373  staticAssertions ();
374 
375  const size_t lclNumRows = rowMap.is_null () ?
376  static_cast<size_t> (0) : rowMap->getLocalNumElements ();
377  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
378  static_cast<size_t> (numEntPerRow.extent (0)) != lclNumRows,
379  std::invalid_argument, "numEntPerRow has length " <<
380  numEntPerRow.extent (0) << " != the local number of rows " <<
381  lclNumRows << " as specified by " "the input row Map.");
382 
383  if (debug_) {
384  for (size_t r = 0; r < lclNumRows; ++r) {
385  const size_t curRowCount = numEntPerRow.view_host()(r);
386  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
387  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
388  std::invalid_argument, "numEntPerRow(" << r << ") "
389  "specifies an invalid number of entries "
390  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
391  }
392  }
393 
394  resumeFill (params);
396  }
397 
398 
399  template <class LocalOrdinal, class GlobalOrdinal, class Node>
401  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
402  const Teuchos::RCP<const map_type>& colMap,
403  const Kokkos::DualView<const size_t*, device_type>& numEntPerRow,
404  const Teuchos::RCP<Teuchos::ParameterList>& params) :
405  dist_object_type (rowMap)
406  , rowMap_ (rowMap)
407  , colMap_ (colMap)
408  , k_numAllocPerRow_ (numEntPerRow.view_host())
409  , numAllocForAllRows_ (0)
410  {
411  const char tfecfFuncName[] =
412  "CrsGraph(rowMap,colMap,numEntPerRow,params): ";
413  staticAssertions ();
414 
415  const size_t lclNumRows = rowMap.is_null () ?
416  static_cast<size_t> (0) : rowMap->getLocalNumElements ();
417  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
418  static_cast<size_t> (numEntPerRow.extent (0)) != lclNumRows,
419  std::invalid_argument, "numEntPerRow has length " <<
420  numEntPerRow.extent (0) << " != the local number of rows " <<
421  lclNumRows << " as specified by " "the input row Map.");
422 
423  if (debug_) {
424  for (size_t r = 0; r < lclNumRows; ++r) {
425  const size_t curRowCount = numEntPerRow.view_host()(r);
426  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
427  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
428  std::invalid_argument, "numEntPerRow(" << r << ") "
429  "specifies an invalid number of entries "
430  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
431  }
432  }
433 
434  resumeFill (params);
436  }
437 
438 
439  template <class LocalOrdinal, class GlobalOrdinal, class Node>
441  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
442  const Teuchos::RCP<const map_type>& colMap,
443  const Teuchos::ArrayView<const size_t>& numEntPerRow,
444  const Teuchos::RCP<Teuchos::ParameterList>& params) :
445  dist_object_type (rowMap)
446  , rowMap_ (rowMap)
447  , colMap_ (colMap)
448  , numAllocForAllRows_ (0)
449  {
450  const char tfecfFuncName[] =
451  "CrsGraph(rowMap,colMap,numEntPerRow,params): ";
452  staticAssertions ();
453 
454  const size_t lclNumRows = rowMap.is_null () ?
455  static_cast<size_t> (0) : rowMap->getLocalNumElements ();
456  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
457  static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
458  std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
459  << " != the local number of rows " << lclNumRows << " as specified by "
460  "the input row Map.");
461 
462  if (debug_) {
463  for (size_t r = 0; r < lclNumRows; ++r) {
464  const size_t curRowCount = numEntPerRow[r];
465  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
466  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
467  std::invalid_argument, "numEntPerRow(" << r << ") "
468  "specifies an invalid number of entries "
469  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
470  }
471  }
472 
473  // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
474  // The latter is a const View, so we have to copy into a nonconst
475  // View first, then assign.
476  typedef decltype (k_numAllocPerRow_) out_view_type;
477  typedef typename out_view_type::non_const_type nc_view_type;
478  typedef Kokkos::View<const size_t*,
479  typename nc_view_type::array_layout,
480  Kokkos::HostSpace,
481  Kokkos::MemoryUnmanaged> in_view_type;
482  in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
483  nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
484  lclNumRows);
485  // DEEP_COPY REVIEW - HOST-TO-HOSTMIRROR
486  using exec_space = typename nc_view_type::execution_space;
487  Kokkos::deep_copy (exec_space(), numAllocPerRowOut, numAllocPerRowIn);
488  k_numAllocPerRow_ = numAllocPerRowOut;
489 
490  resumeFill (params);
492  }
493 
494 
495  template <class LocalOrdinal, class GlobalOrdinal, class Node>
498  const Teuchos::RCP<const map_type>& rowMap,
499  const Teuchos::RCP<Teuchos::ParameterList>& params) :
500  dist_object_type (rowMap)
501  , rowMap_(rowMap)
502  , colMap_(originalGraph.colMap_)
503  , numAllocForAllRows_(originalGraph.numAllocForAllRows_)
504  , storageStatus_(originalGraph.storageStatus_)
505  , indicesAreAllocated_(originalGraph.indicesAreAllocated_)
506  , indicesAreLocal_(originalGraph.indicesAreLocal_)
507  , indicesAreSorted_(originalGraph.indicesAreSorted_)
508  {
509  staticAssertions();
510 
511  int numRows = rowMap->getLocalNumElements();
512  size_t numNonZeros = originalGraph.getRowPtrsPackedHost()(numRows);
513  auto rowsToUse = Kokkos::pair<size_t, size_t>(0, numRows+1);
514 
515 
516  this->setRowPtrsUnpacked(Kokkos::subview(originalGraph.getRowPtrsUnpackedDevice(), rowsToUse));
517  this->setRowPtrsPacked(Kokkos::subview(originalGraph.getRowPtrsPackedDevice(), rowsToUse));
518 
519  if (indicesAreLocal_) {
520  lclIndsUnpacked_wdv = local_inds_wdv_type(originalGraph.lclIndsUnpacked_wdv, 0, numNonZeros);
521  lclIndsPacked_wdv = local_inds_wdv_type(originalGraph.lclIndsPacked_wdv, 0, numNonZeros);
522  }
523  else {
524  gblInds_wdv = global_inds_wdv_type(originalGraph.gblInds_wdv, 0, numNonZeros);
525  }
526 
528  }
529 
530  template <class LocalOrdinal, class GlobalOrdinal, class Node>
532  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
533  const Teuchos::RCP<const map_type>& colMap,
534  const typename local_graph_device_type::row_map_type& rowPointers,
535  const typename local_graph_device_type::entries_type::non_const_type& columnIndices,
536  const Teuchos::RCP<Teuchos::ParameterList>& params) :
537  dist_object_type (rowMap)
538  , rowMap_(rowMap)
539  , colMap_(colMap)
540  , numAllocForAllRows_(0)
541  , storageStatus_(Details::STORAGE_1D_PACKED)
542  , indicesAreAllocated_(true)
543  , indicesAreLocal_(true)
544  {
545  staticAssertions ();
546  if (! params.is_null() && params->isParameter("sorted") &&
547  ! params->get<bool>("sorted")) {
548  indicesAreSorted_ = false;
549  }
550  else {
551  indicesAreSorted_ = true;
552  }
553  setAllIndices (rowPointers, columnIndices);
555  }
556 
557  template <class LocalOrdinal, class GlobalOrdinal, class Node>
559  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
560  const Teuchos::RCP<const map_type>& colMap,
561  const Teuchos::ArrayRCP<size_t>& rowPointers,
562  const Teuchos::ArrayRCP<LocalOrdinal> & columnIndices,
563  const Teuchos::RCP<Teuchos::ParameterList>& params) :
564  dist_object_type (rowMap)
565  , rowMap_ (rowMap)
566  , colMap_ (colMap)
567  , numAllocForAllRows_ (0)
568  , storageStatus_ (Details::STORAGE_1D_PACKED)
569  , indicesAreAllocated_ (true)
570  , indicesAreLocal_ (true)
571  {
572  staticAssertions ();
573  if (! params.is_null() && params->isParameter("sorted") &&
574  ! params->get<bool>("sorted")) {
575  indicesAreSorted_ = false;
576  }
577  else {
578  indicesAreSorted_ = true;
579  }
580  setAllIndices (rowPointers, columnIndices);
582  }
583 
584  template <class LocalOrdinal, class GlobalOrdinal, class Node>
586  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
587  const Teuchos::RCP<const map_type>& colMap,
588  const local_graph_device_type& k_local_graph_,
589  const Teuchos::RCP<Teuchos::ParameterList>& params)
590  : CrsGraph (k_local_graph_,
591  rowMap,
592  colMap,
593  Teuchos::null,
594  Teuchos::null,
595  params)
596  {}
597 
598  template <class LocalOrdinal, class GlobalOrdinal, class Node>
600  CrsGraph (const local_graph_device_type& k_local_graph_,
601  const Teuchos::RCP<const map_type>& rowMap,
602  const Teuchos::RCP<const map_type>& colMap,
603  const Teuchos::RCP<const map_type>& domainMap,
604  const Teuchos::RCP<const map_type>& rangeMap,
605  const Teuchos::RCP<Teuchos::ParameterList>& params)
606  : DistObject<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, node_type> (rowMap)
607  , rowMap_ (rowMap)
608  , colMap_ (colMap)
609  , numAllocForAllRows_ (0)
610  , storageStatus_ (Details::STORAGE_1D_PACKED)
611  , indicesAreAllocated_ (true)
612  , indicesAreLocal_ (true)
613  {
614  staticAssertions();
615  const char tfecfFuncName[] = "CrsGraph(Kokkos::LocalStaticCrsGraph,Map,Map,Map,Map)";
616 
617  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
618  colMap.is_null (), std::runtime_error,
619  ": The input column Map must be nonnull.");
620  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
621  k_local_graph_.numRows () != rowMap->getLocalNumElements (),
622  std::runtime_error,
623  ": The input row Map and the input local graph need to have the same "
624  "number of rows. The row Map claims " << rowMap->getLocalNumElements ()
625  << " row(s), but the local graph claims " << k_local_graph_.numRows ()
626  << " row(s).");
627 
628  // NOTE (mfh 17 Mar 2014) getLocalNumRows() returns
629  // rowMap_->getLocalNumElements(), but it doesn't have to.
630  // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
631  // k_local_graph_.numRows () != getLocalNumRows (), std::runtime_error,
632  // ": The input row Map and the input local graph need to have the same "
633  // "number of rows. The row Map claims " << getLocalNumRows () << " row(s), "
634  // "but the local graph claims " << k_local_graph_.numRows () << " row(s).");
635  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
636  lclIndsUnpacked_wdv.extent (0) != 0 || gblInds_wdv.extent (0) != 0, std::logic_error,
637  ": cannot have 1D data structures allocated.");
638 
639  if(! params.is_null() && params->isParameter("sorted") &&
640  ! params->get<bool>("sorted")) {
641  indicesAreSorted_ = false;
642  }
643  else {
644  indicesAreSorted_ = true;
645  }
646 
647  setDomainRangeMaps (domainMap.is_null() ? rowMap_ : domainMap,
648  rangeMap .is_null() ? rowMap_ : rangeMap);
649  Teuchos::Array<int> remotePIDs (0); // unused output argument
650  this->makeImportExport (remotePIDs, false);
651 
652  lclIndsPacked_wdv = local_inds_wdv_type(k_local_graph_.entries);
654  this->setRowPtrs(k_local_graph_.row_map);
655 
656  set_need_sync_host_uvm_access(); // lclGraph_ potentially still in a kernel
657 
658  const bool callComputeGlobalConstants = params.get () == nullptr ||
659  params->get ("compute global constants", true);
660 
661  if (callComputeGlobalConstants) {
662  this->computeGlobalConstants ();
663  }
664  this->fillComplete_ = true;
665  this->checkInternalState ();
666  }
667 
668  template <class LocalOrdinal, class GlobalOrdinal, class Node>
671  const Teuchos::RCP<const map_type>& rowMap,
672  const Teuchos::RCP<const map_type>& colMap,
673  const Teuchos::RCP<const map_type>& domainMap,
674  const Teuchos::RCP<const map_type>& rangeMap,
675  const Teuchos::RCP<const import_type>& importer,
676  const Teuchos::RCP<const export_type>& exporter,
677  const Teuchos::RCP<Teuchos::ParameterList>& params) :
678  DistObject<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, node_type> (rowMap),
679  rowMap_ (rowMap),
680  colMap_ (colMap),
681  rangeMap_ (rangeMap.is_null () ? rowMap : rangeMap),
682  domainMap_ (domainMap.is_null () ? rowMap : domainMap),
683  importer_ (importer),
684  exporter_ (exporter),
685  numAllocForAllRows_ (0),
686  storageStatus_ (Details::STORAGE_1D_PACKED),
687  indicesAreAllocated_ (true),
688  indicesAreLocal_ (true)
689  {
690  staticAssertions();
691  const char tfecfFuncName[] = "Tpetra::CrsGraph(local_graph_device_type,"
692  "Map,Map,Map,Map,Import,Export,params): ";
693 
694  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
695  (colMap.is_null (), std::runtime_error,
696  "The input column Map must be nonnull.");
697 
698  lclIndsPacked_wdv = local_inds_wdv_type(lclGraph.entries);
700  setRowPtrs(lclGraph.row_map);
701 
702  set_need_sync_host_uvm_access(); // lclGraph_ potentially still in a kernel
703 
704  if (! params.is_null() && params->isParameter("sorted") &&
705  ! params->get<bool>("sorted")) {
706  indicesAreSorted_ = false;
707  }
708  else {
709  indicesAreSorted_ = true;
710  }
711 
712  const bool callComputeGlobalConstants =
713  params.get () == nullptr ||
714  params->get ("compute global constants", true);
715  if (callComputeGlobalConstants) {
716  this->computeGlobalConstants ();
717  }
718  fillComplete_ = true;
720  }
721 
722  template <class LocalOrdinal, class GlobalOrdinal, class Node>
724  CrsGraph (const row_ptrs_device_view_type& rowPointers,
725  const local_inds_wdv_type& columnIndices,
726  const Teuchos::RCP<const map_type>& rowMap,
727  const Teuchos::RCP<const map_type>& colMap,
728  const Teuchos::RCP<const map_type>& domainMap,
729  const Teuchos::RCP<const map_type>& rangeMap,
730  const Teuchos::RCP<const import_type>& importer,
731  const Teuchos::RCP<const export_type>& exporter,
732  const Teuchos::RCP<Teuchos::ParameterList>& params) :
733  DistObject<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, node_type> (rowMap),
734  rowMap_ (rowMap),
735  colMap_ (colMap),
736  rangeMap_ (rangeMap.is_null () ? rowMap : rangeMap),
737  domainMap_ (domainMap.is_null () ? rowMap : domainMap),
738  importer_ (importer),
739  exporter_ (exporter),
740  numAllocForAllRows_ (0),
741  storageStatus_ (Details::STORAGE_1D_PACKED),
742  indicesAreAllocated_ (true),
743  indicesAreLocal_ (true)
744  {
745  staticAssertions();
746  const char tfecfFuncName[] = "Tpetra::CrsGraph(row_ptrs_device_view_type,local_inds_wdv_type"
747  "Map,Map,Map,Map,Import,Export,params): ";
748 
749  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
750  (colMap.is_null (), std::runtime_error,
751  "The input column Map must be nonnull.");
752 
753  lclIndsPacked_wdv = columnIndices;
755  setRowPtrs(rowPointers);
756 
757  set_need_sync_host_uvm_access(); // lclGraph_ potentially still in a kernel
758 
759  if (! params.is_null() && params->isParameter("sorted") &&
760  ! params->get<bool>("sorted")) {
761  indicesAreSorted_ = false;
762  }
763  else {
764  indicesAreSorted_ = true;
765  }
766 
767  const bool callComputeGlobalConstants =
768  params.get () == nullptr ||
769  params->get ("compute global constants", true);
770  if (callComputeGlobalConstants) {
771  this->computeGlobalConstants ();
772  }
773  fillComplete_ = true;
775  }
776 
777  template <class LocalOrdinal, class GlobalOrdinal, class Node>
778  Teuchos::RCP<const Teuchos::ParameterList>
781  {
782  using Teuchos::RCP;
783  using Teuchos::ParameterList;
784  using Teuchos::parameterList;
785 
786  RCP<ParameterList> params = parameterList ("Tpetra::CrsGraph");
787 
788  // Make a sublist for the Import.
789  RCP<ParameterList> importSublist = parameterList ("Import");
790 
791  // FIXME (mfh 02 Apr 2012) We should really have the Import and
792  // Export objects fill in these lists. However, we don't want to
793  // create an Import or Export unless we need them. For now, we
794  // know that the Import and Export just pass the list directly to
795  // their Distributor, so we can create a Distributor here
796  // (Distributor's constructor is a lightweight operation) and have
797  // it fill in the list.
798 
799  // Fill in Distributor default parameters by creating a
800  // Distributor and asking it to do the work.
801  Distributor distributor (rowMap_->getComm (), importSublist);
802  params->set ("Import", *importSublist, "How the Import performs communication.");
803 
804  // Make a sublist for the Export. For now, it's a clone of the
805  // Import sublist. It's not a shallow copy, though, since we
806  // might like the Import to do communication differently than the
807  // Export.
808  params->set ("Export", *importSublist, "How the Export performs communication.");
809 
810  return params;
811  }
812 
813  template <class LocalOrdinal, class GlobalOrdinal, class Node>
814  void
816  setParameterList (const Teuchos::RCP<Teuchos::ParameterList>& params)
817  {
818  Teuchos::RCP<const Teuchos::ParameterList> validParams =
819  getValidParameters ();
820  params->validateParametersAndSetDefaults (*validParams);
821  this->setMyParamList (params);
822  }
823 
824  template <class LocalOrdinal, class GlobalOrdinal, class Node>
828  {
829  return rowMap_->getGlobalNumElements ();
830  }
831 
832  template <class LocalOrdinal, class GlobalOrdinal, class Node>
836  {
837  const char tfecfFuncName[] = "getGlobalNumCols: ";
838  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
839  ! isFillComplete () || getDomainMap ().is_null (), std::runtime_error,
840  "The graph does not have a domain Map. You may not call this method in "
841  "that case.");
842  return getDomainMap ()->getGlobalNumElements ();
843  }
844 
845 
846  template <class LocalOrdinal, class GlobalOrdinal, class Node>
847  size_t
850  {
851  return this->rowMap_.is_null () ?
852  static_cast<size_t> (0) :
853  this->rowMap_->getLocalNumElements ();
854  }
855 
856 
857  template <class LocalOrdinal, class GlobalOrdinal, class Node>
858  size_t
861  {
862  const char tfecfFuncName[] = "getLocalNumCols: ";
863  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
864  ! hasColMap (), std::runtime_error,
865  "The graph does not have a column Map. You may not call this method "
866  "unless the graph has a column Map. This requires either that a custom "
867  "column Map was given to the constructor, or that fillComplete() has "
868  "been called.");
869  return colMap_.is_null () ? static_cast<size_t> (0) :
870  colMap_->getLocalNumElements ();
871  }
872 
873 
874 
875  template <class LocalOrdinal, class GlobalOrdinal, class Node>
876  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
878  getRowMap () const
879  {
880  return rowMap_;
881  }
882 
883  template <class LocalOrdinal, class GlobalOrdinal, class Node>
884  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
886  getColMap () const
887  {
888  return colMap_;
889  }
890 
891  template <class LocalOrdinal, class GlobalOrdinal, class Node>
892  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
895  {
896  return domainMap_;
897  }
898 
899  template <class LocalOrdinal, class GlobalOrdinal, class Node>
900  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
902  getRangeMap () const
903  {
904  return rangeMap_;
905  }
906 
907  template <class LocalOrdinal, class GlobalOrdinal, class Node>
908  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::import_type>
910  getImporter () const
911  {
912  return importer_;
913  }
914 
915  template <class LocalOrdinal, class GlobalOrdinal, class Node>
916  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::export_type>
918  getExporter () const
919  {
920  return exporter_;
921  }
922 
923  template <class LocalOrdinal, class GlobalOrdinal, class Node>
924  bool
926  hasColMap () const
927  {
928  return ! colMap_.is_null ();
929  }
930 
931  template <class LocalOrdinal, class GlobalOrdinal, class Node>
932  bool
935  {
936  // FIXME (mfh 07 Aug 2014) Why wouldn't storage be optimized if
937  // getLocalNumRows() is zero?
938 
939  const bool isOpt = indicesAreAllocated_ &&
940  k_numRowEntries_.extent (0) == 0 &&
941  getLocalNumRows () > 0;
942 
943  return isOpt;
944  }
945 
946 
947  template <class LocalOrdinal, class GlobalOrdinal, class Node>
951  {
952  const char tfecfFuncName[] = "getGlobalNumEntries: ";
953  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
954  (! this->haveGlobalConstants_, std::logic_error,
955  "The graph does not have global constants computed, "
956  "but the user has requested them.");
957 
958  return globalNumEntries_;
959  }
960 
961 
962  template <class LocalOrdinal, class GlobalOrdinal, class Node>
963  size_t
966  {
967  const char tfecfFuncName[] = "getLocalNumEntries: ";
968  typedef LocalOrdinal LO;
969 
970  if (this->indicesAreAllocated_) {
971  const LO lclNumRows = this->getLocalNumRows ();
972  if (lclNumRows == 0) {
973  return static_cast<size_t> (0);
974  }
975  else {
976  // Avoid the "*this capture" issue by creating a local Kokkos::View.
977  auto numEntPerRow = this->k_numRowEntries_;
978  const LO numNumEntPerRow = numEntPerRow.extent (0);
979  if (numNumEntPerRow == 0) {
980  if (static_cast<LO> (this->getRowPtrsPackedDevice().extent (0)) <
981  static_cast<LO> (lclNumRows + 1)) {
982  return static_cast<size_t> (0);
983  }
984  else {
985  // indices are allocated and k_numRowEntries_ is not allocated,
986  // so we have packed storage and the length of lclIndsPacked_wdv
987  // must be the number of local entries.
988  if(debug_) {
989  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
990  (this->getRowPtrsPackedHost()(lclNumRows) != lclIndsPacked_wdv.extent(0), std::logic_error,
991  "Final entry of packed host rowptrs doesn't match the length of lclIndsPacked");
992  }
993  return lclIndsPacked_wdv.extent(0);
994  }
995  }
996  else { // k_numRowEntries_ is populated
997  // k_numRowEntries_ is actually be a host View, so we run
998  // the sum in its native execution space. This also means
999  // that we can use explicit capture (which could perhaps
1000  // improve build time) instead of KOKKOS_LAMBDA, and avoid
1001  // any CUDA build issues with trying to run a __device__ -
1002  // only function on host.
1003  typedef typename num_row_entries_type::execution_space
1004  host_exec_space;
1005  typedef Kokkos::RangePolicy<host_exec_space, LO> range_type;
1006 
1007  const LO upperLoopBound = lclNumRows < numNumEntPerRow ?
1008  lclNumRows :
1009  numNumEntPerRow;
1010  size_t nodeNumEnt = 0;
1011  Kokkos::parallel_reduce ("Tpetra::CrsGraph::getNumNodeEntries",
1012  range_type (0, upperLoopBound),
1013  [=] (const LO& k, size_t& lclSum) {
1014  lclSum += numEntPerRow(k);
1015  }, nodeNumEnt);
1016  return nodeNumEnt;
1017  }
1018  }
1019  }
1020  else { // nothing allocated on this process, so no entries
1021  return static_cast<size_t> (0);
1022  }
1023  }
1024 
1025  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1029  {
1030  const char tfecfFuncName[] = "getGlobalMaxNumRowEntries: ";
1031  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1032  (! this->haveGlobalConstants_, std::logic_error,
1033  "The graph does not have global constants computed, "
1034  "but the user has requested them.");
1035 
1036  return globalMaxNumRowEntries_;
1037  }
1038 
1039  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1040  size_t
1043  {
1044  return nodeMaxNumRowEntries_;
1045  }
1046 
1047  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1048  bool
1051  {
1052  return fillComplete_;
1053  }
1054 
1055  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1056  bool
1059  {
1060  return ! fillComplete_;
1061  }
1062 
1063 
1064  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1065  bool
1068  {
1069  return indicesAreLocal_;
1070  }
1071 
1072  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1073  bool
1076  {
1077  return indicesAreGlobal_;
1078  }
1079 
1080  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1081  size_t
1084  {
1085  typedef LocalOrdinal LO;
1086 
1087  if (this->indicesAreAllocated_) {
1088  const LO lclNumRows = this->getLocalNumRows ();
1089  if (lclNumRows == 0) {
1090  return static_cast<size_t> (0);
1091  }
1092  else if (storageStatus_ == Details::STORAGE_1D_PACKED) {
1093  if (static_cast<LO> (this->getRowPtrsPackedDevice().extent (0)) <
1094  static_cast<LO> (lclNumRows + 1)) {
1095  return static_cast<size_t> (0);
1096  }
1097  else {
1098  if(this->isLocallyIndexed())
1099  return lclIndsPacked_wdv.extent(0);
1100  else
1101  return gblInds_wdv.extent(0);
1102  }
1103  }
1104  else if (storageStatus_ == Details::STORAGE_1D_UNPACKED) {
1105  auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
1106  if (rowPtrsUnpacked_host.extent (0) == 0) {
1107  return static_cast<size_t> (0);
1108  }
1109  else {
1110  if(this->isLocallyIndexed())
1111  return lclIndsUnpacked_wdv.extent(0);
1112  else
1113  return gblInds_wdv.extent(0);
1114  }
1115  }
1116  else {
1117  return static_cast<size_t> (0);
1118  }
1119  }
1120  else {
1121  return Tpetra::Details::OrdinalTraits<size_t>::invalid ();
1122  }
1123  }
1124 
1125  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1126  Teuchos::RCP<const Teuchos::Comm<int> >
1128  getComm () const
1129  {
1130  return this->rowMap_.is_null () ? Teuchos::null : this->rowMap_->getComm ();
1131  }
1132 
1133  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1134  GlobalOrdinal
1137  {
1138  return rowMap_->getIndexBase ();
1139  }
1140 
1141  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1142  bool
1144  indicesAreAllocated () const
1145  {
1146  return indicesAreAllocated_;
1147  }
1148 
1149  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1150  bool
1152  isSorted () const
1153  {
1154  return indicesAreSorted_;
1155  }
1156 
1157  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1158  bool
1160  isMerged () const
1161  {
1162  return noRedundancies_;
1163  }
1164 
1165  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1166  void
1169  {
1170  // FIXME (mfh 07 May 2013) How do we know that the change
1171  // introduced a redundancy, or even that it invalidated the sorted
1172  // order of indices? CrsGraph has always made this conservative
1173  // guess. It could be a bit costly to check at insertion time,
1174  // though.
1175  indicesAreSorted_ = false;
1176  noRedundancies_ = false;
1177 
1178  // We've modified the graph, so we'll have to recompute local
1179  // constants like the number of diagonal entries on this process.
1180  haveLocalConstants_ = false;
1181  }
1182 
1183  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1184  void
1186  allocateIndices (const ELocalGlobal lg, const bool verbose)
1187  {
1189  using Teuchos::arcp;
1190  using Teuchos::Array;
1191  using Teuchos::ArrayRCP;
1192  using std::endl;
1193  typedef Teuchos::ArrayRCP<size_t>::size_type size_type;
1194  typedef typename local_graph_device_type::row_map_type::non_const_type
1195  non_const_row_map_type;
1196  const char tfecfFuncName[] = "allocateIndices: ";
1197  const char suffix[] =
1198  " Please report this bug to the Tpetra developers.";
1199  ProfilingRegion profRegion("Tpetra::CrsGraph::allocateIndices");
1200 
1201  std::unique_ptr<std::string> prefix;
1202  if (verbose) {
1203  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
1204  std::ostringstream os;
1205  os << *prefix << "Start: lg="
1206  << (lg == GlobalIndices ? "GlobalIndices" : "LocalIndices")
1207  << ", numRows: " << this->getLocalNumRows() << endl;
1208  std::cerr << os.str();
1209  }
1210 
1211  // This is a protected function, only callable by us. If it was
1212  // called incorrectly, it is our fault. That's why the tests
1213  // below throw std::logic_error instead of std::invalid_argument.
1214  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1215  (isLocallyIndexed () && lg == GlobalIndices, std::logic_error,
1216  ": The graph is locally indexed, but Tpetra code is calling "
1217  "this method with lg=GlobalIndices." << suffix);
1218  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1219  (isGloballyIndexed () && lg == LocalIndices, std::logic_error,
1220  ": The graph is globally indexed, but Tpetra code is calling "
1221  "this method with lg=LocalIndices." << suffix);
1222  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1223  (indicesAreAllocated (), std::logic_error, ": The graph's "
1224  "indices are already allocated, but Tpetra is calling "
1225  "allocateIndices again." << suffix);
1226  const size_t numRows = this->getLocalNumRows ();
1227 
1228  //
1229  // STATIC ALLOCATION PROFILE
1230  //
1231  size_type numInds = 0;
1232  {
1233  if (verbose) {
1234  std::ostringstream os;
1235  os << *prefix << "Allocate k_rowPtrs: " << (numRows+1) << endl;
1236  std::cerr << os.str();
1237  }
1238  non_const_row_map_type k_rowPtrs ("Tpetra::CrsGraph::ptr", numRows + 1);
1239 
1240  if (this->k_numAllocPerRow_.extent (0) != 0) {
1241  // It's OK to throw std::invalid_argument here, because we
1242  // haven't incurred any side effects yet. Throwing that
1243  // exception (and not, say, std::logic_error) implies that the
1244  // instance can recover.
1245  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1246  (this->k_numAllocPerRow_.extent (0) != numRows,
1247  std::invalid_argument, "k_numAllocPerRow_ is allocated, that is, "
1248  "has nonzero length " << this->k_numAllocPerRow_.extent (0)
1249  << ", but its length != numRows = " << numRows << ".");
1250 
1251  // k_numAllocPerRow_ is a host View, but k_rowPtrs (the thing
1252  // we want to compute here) lives on device. That's OK;
1253  // computeOffsetsFromCounts can handle this case.
1255 
1256  // FIXME (mfh 27 Jun 2016) Currently, computeOffsetsFromCounts
1257  // doesn't attempt to check its input for "invalid" flag
1258  // values. For now, we omit that feature of the sequential
1259  // code disabled below.
1260  numInds = computeOffsetsFromCounts (k_rowPtrs, k_numAllocPerRow_);
1261  }
1262  else {
1263  // It's OK to throw std::invalid_argument here, because we
1264  // haven't incurred any side effects yet. Throwing that
1265  // exception (and not, say, std::logic_error) implies that the
1266  // instance can recover.
1267  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1268  (this->numAllocForAllRows_ ==
1269  Tpetra::Details::OrdinalTraits<size_t>::invalid (),
1270  std::invalid_argument, "numAllocForAllRows_ has an invalid value, "
1271  "namely Tpetra::Details::OrdinalTraits<size_t>::invalid() = " <<
1272  Tpetra::Details::OrdinalTraits<size_t>::invalid () << ".");
1273 
1275  numInds = computeOffsetsFromConstantCount (k_rowPtrs, this->numAllocForAllRows_);
1276  }
1277  // "Commit" the resulting row offsets.
1278  setRowPtrsUnpacked(k_rowPtrs);
1279  }
1280  if(debug_) {
1281  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1282  (numInds != size_type(this->getRowPtrsUnpackedHost()(numRows)), std::logic_error,
1283  ": Number of indices produced by computeOffsetsFrom[Constant]Counts "
1284  "does not match final entry of rowptrs unpacked");
1285  }
1286 
1287 
1288  if (lg == LocalIndices) {
1289  if (verbose) {
1290  std::ostringstream os;
1291  os << *prefix << "Allocate local column indices "
1292  "lclIndsUnpacked_wdv: " << numInds << endl;
1293  std::cerr << os.str();
1294  }
1295  lclIndsUnpacked_wdv = local_inds_wdv_type (
1296  local_inds_dualv_type("Tpetra::CrsGraph::lclInd",numInds));
1297  }
1298  else {
1299  if (verbose) {
1300  std::ostringstream os;
1301  os << *prefix << "Allocate global column indices "
1302  "gblInds_wdv: " << numInds << endl;
1303  std::cerr << os.str();
1304  }
1305  gblInds_wdv = global_inds_wdv_type (
1306  global_inds_dualv_type("Tpetra::CrsGraph::gblInd",numInds));
1307  }
1308  storageStatus_ = Details::STORAGE_1D_UNPACKED;
1309 
1310  this->indicesAreLocal_ = (lg == LocalIndices);
1311  this->indicesAreGlobal_ = (lg == GlobalIndices);
1312 
1313  if (numRows > 0) { // reallocate k_numRowEntries_ & fill w/ 0s
1314  using Kokkos::ViewAllocateWithoutInitializing;
1315  const char label[] = "Tpetra::CrsGraph::numRowEntries";
1316  if (verbose) {
1317  std::ostringstream os;
1318  os << *prefix << "Allocate k_numRowEntries_: " << numRows
1319  << endl;
1320  std::cerr << os.str();
1321  }
1322  num_row_entries_type numRowEnt (ViewAllocateWithoutInitializing (label), numRows);
1323  // DEEP_COPY REVIEW - VALUE-TO-HOSTMIRROR
1324  Kokkos::deep_copy (execution_space(), numRowEnt, static_cast<size_t> (0)); // fill w/ 0s
1325  Kokkos::fence("CrsGraph::allocateIndices"); // TODO: Need to understand downstream failure points and move this fence.
1326  this->k_numRowEntries_ = numRowEnt; // "commit" our allocation
1327  }
1328 
1329  // Once indices are allocated, CrsGraph needs to free this information.
1330  this->numAllocForAllRows_ = 0;
1331  this->k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
1332  this->indicesAreAllocated_ = true;
1333 
1334  try {
1335  this->checkInternalState ();
1336  }
1337  catch (std::logic_error& e) {
1338  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1339  (true, std::logic_error, "At end of allocateIndices, "
1340  "checkInternalState threw std::logic_error: "
1341  << e.what ());
1342  }
1343  catch (std::exception& e) {
1344  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1345  (true, std::runtime_error, "At end of allocateIndices, "
1346  "checkInternalState threw std::exception: "
1347  << e.what ());
1348  }
1349  catch (...) {
1350  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1351  (true, std::runtime_error, "At end of allocateIndices, "
1352  "checkInternalState threw an exception "
1353  "not a subclass of std::exception.");
1354  }
1355 
1356  if (verbose) {
1357  std::ostringstream os;
1358  os << *prefix << "Done" << endl;
1359  std::cerr << os.str();
1360  }
1361  }
1362 
1363  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1364  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1365  local_inds_dualv_type::t_host::const_type
1367  getLocalIndsViewHost (const RowInfo& rowinfo) const
1368  {
1369  if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1370  return typename local_inds_dualv_type::t_host::const_type ();
1371  else
1372  return lclIndsUnpacked_wdv.getHostSubview(rowinfo.offset1D,
1373  rowinfo.allocSize,
1374  Access::ReadOnly);
1375  }
1376 
1377  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1379  local_inds_dualv_type::t_host
1382  {
1383  if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1384  return typename local_inds_dualv_type::t_host ();
1385  else
1386  return lclIndsUnpacked_wdv.getHostSubview(rowinfo.offset1D,
1387  rowinfo.allocSize,
1388  Access::ReadWrite);
1389  }
1390 
1391  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1393  global_inds_dualv_type::t_host::const_type
1395  getGlobalIndsViewHost (const RowInfo& rowinfo) const
1396  {
1397  if (rowinfo.allocSize == 0 || gblInds_wdv.extent(0) == 0)
1398  return typename global_inds_dualv_type::t_host::const_type ();
1399  else
1400  return gblInds_wdv.getHostSubview(rowinfo.offset1D,
1401  rowinfo.allocSize,
1402  Access::ReadOnly);
1403  }
1404 
1405  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1407  local_inds_dualv_type::t_dev::const_type
1409  getLocalIndsViewDevice (const RowInfo& rowinfo) const
1410  {
1411  if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1412  return typename local_inds_dualv_type::t_dev::const_type ();
1413  else
1414  return lclIndsUnpacked_wdv.getDeviceSubview(rowinfo.offset1D,
1415  rowinfo.allocSize,
1416  Access::ReadOnly);
1417  }
1418 
1419  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1421  global_inds_dualv_type::t_dev::const_type
1423  getGlobalIndsViewDevice (const RowInfo& rowinfo) const
1424  {
1425  if (rowinfo.allocSize == 0 || gblInds_wdv.extent(0) == 0)
1426  return typename global_inds_dualv_type::t_dev::const_type ();
1427  else
1428  return gblInds_wdv.getDeviceSubview(rowinfo.offset1D,
1429  rowinfo.allocSize,
1430  Access::ReadOnly);
1431  }
1432 
1433 
1434  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1435  RowInfo
1437  getRowInfo (const LocalOrdinal myRow) const
1438  {
1439  const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1440  RowInfo ret;
1441  if (this->rowMap_.is_null () || ! this->rowMap_->isNodeLocalElement (myRow)) {
1442  ret.localRow = STINV;
1443  ret.allocSize = 0;
1444  ret.numEntries = 0;
1445  ret.offset1D = STINV;
1446  return ret;
1447  }
1448 
1449  ret.localRow = static_cast<size_t> (myRow);
1450  if (this->indicesAreAllocated ()) {
1451  auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
1452  // Offsets tell us the allocation size in this case.
1453  if (rowPtrsUnpacked_host.extent (0) == 0) {
1454  ret.offset1D = 0;
1455  ret.allocSize = 0;
1456  }
1457  else {
1458  ret.offset1D = rowPtrsUnpacked_host(myRow);
1459  ret.allocSize = rowPtrsUnpacked_host(myRow+1) - rowPtrsUnpacked_host(myRow);
1460  }
1461 
1462  ret.numEntries = (this->k_numRowEntries_.extent (0) == 0) ?
1463  ret.allocSize :
1464  this->k_numRowEntries_(myRow);
1465  }
1466  else { // haven't performed allocation yet; probably won't hit this code
1467  // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1468  // allocate, rather than doing lazy allocation at first insert.
1469  // This will make k_numAllocPerRow_ obsolete.
1470  ret.allocSize = (this->k_numAllocPerRow_.extent (0) != 0) ?
1471  this->k_numAllocPerRow_(myRow) : // this is a host View
1472  this->numAllocForAllRows_;
1473  ret.numEntries = 0;
1474  ret.offset1D = STINV;
1475  }
1476 
1477  return ret;
1478  }
1479 
1480 
1481  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1482  RowInfo
1484  getRowInfoFromGlobalRowIndex (const GlobalOrdinal gblRow) const
1485  {
1486  const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1487  RowInfo ret;
1488  if (this->rowMap_.is_null ()) {
1489  ret.localRow = STINV;
1490  ret.allocSize = 0;
1491  ret.numEntries = 0;
1492  ret.offset1D = STINV;
1493  return ret;
1494  }
1495  const LocalOrdinal myRow = this->rowMap_->getLocalElement (gblRow);
1496  if (myRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid ()) {
1497  ret.localRow = STINV;
1498  ret.allocSize = 0;
1499  ret.numEntries = 0;
1500  ret.offset1D = STINV;
1501  return ret;
1502  }
1503 
1504  ret.localRow = static_cast<size_t> (myRow);
1505  if (this->indicesAreAllocated ()) {
1506  // graph data structures have the info that we need
1507  //
1508  // if static graph, offsets tell us the allocation size
1509  auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
1510  if (rowPtrsUnpacked_host.extent (0) == 0) {
1511  ret.offset1D = 0;
1512  ret.allocSize = 0;
1513  }
1514  else {
1515  ret.offset1D = rowPtrsUnpacked_host(myRow);
1516  ret.allocSize = rowPtrsUnpacked_host(myRow+1) - rowPtrsUnpacked_host(myRow);
1517  }
1518 
1519  ret.numEntries = (this->k_numRowEntries_.extent (0) == 0) ?
1520  ret.allocSize :
1521  this->k_numRowEntries_(myRow);
1522  }
1523  else { // haven't performed allocation yet; probably won't hit this code
1524  // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1525  // allocate, rather than doing lazy allocation at first insert.
1526  // This will make k_numAllocPerRow_ obsolete.
1527  ret.allocSize = (this->k_numAllocPerRow_.extent (0) != 0) ?
1528  this->k_numAllocPerRow_(myRow) : // this is a host View
1529  this->numAllocForAllRows_;
1530  ret.numEntries = 0;
1531  ret.offset1D = STINV;
1532  }
1533 
1534  return ret;
1535  }
1536 
1537 
1538  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1539  void
1541  staticAssertions () const
1542  {
1543  using Teuchos::OrdinalTraits;
1544  typedef LocalOrdinal LO;
1545  typedef GlobalOrdinal GO;
1546  typedef global_size_t GST;
1547 
1548  // Assumption: sizeof(GlobalOrdinal) >= sizeof(LocalOrdinal):
1549  // This is so that we can store local indices in the memory
1550  // formerly occupied by global indices.
1551  static_assert (sizeof (GlobalOrdinal) >= sizeof (LocalOrdinal),
1552  "Tpetra::CrsGraph: sizeof(GlobalOrdinal) must be >= sizeof(LocalOrdinal).");
1553  // Assumption: max(size_t) >= max(LocalOrdinal)
1554  // This is so that we can represent any LocalOrdinal as a size_t.
1555  static_assert (sizeof (size_t) >= sizeof (LocalOrdinal),
1556  "Tpetra::CrsGraph: sizeof(size_t) must be >= sizeof(LocalOrdinal).");
1557  static_assert (sizeof(GST) >= sizeof(size_t),
1558  "Tpetra::CrsGraph: sizeof(Tpetra::global_size_t) must be >= sizeof(size_t).");
1559 
1560  // FIXME (mfh 30 Sep 2015) We're not using
1561  // Teuchos::CompileTimeAssert any more. Can we do these checks
1562  // with static_assert?
1563 
1564  // can't call max() with CompileTimeAssert, because it isn't a
1565  // constant expression; will need to make this a runtime check
1566  const char msg[] = "Tpetra::CrsGraph: Object cannot be created with the "
1567  "given template arguments: size assumptions are not valid.";
1568  TEUCHOS_TEST_FOR_EXCEPTION(
1569  static_cast<size_t> (Teuchos::OrdinalTraits<LO>::max ()) > Teuchos::OrdinalTraits<size_t>::max (),
1570  std::runtime_error, msg);
1571  TEUCHOS_TEST_FOR_EXCEPTION(
1572  static_cast<GST> (Teuchos::OrdinalTraits<LO>::max ()) > static_cast<GST> (Teuchos::OrdinalTraits<GO>::max ()),
1573  std::runtime_error, msg);
1574  TEUCHOS_TEST_FOR_EXCEPTION(
1575  static_cast<size_t> (Teuchos::OrdinalTraits<GO>::max ()) > Teuchos::OrdinalTraits<GST>::max(),
1576  std::runtime_error, msg);
1577  TEUCHOS_TEST_FOR_EXCEPTION(
1578  Teuchos::OrdinalTraits<size_t>::max () > Teuchos::OrdinalTraits<GST>::max (),
1579  std::runtime_error, msg);
1580  }
1581 
1582 
1583  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1584  size_t
1587  const SLocalGlobalViews &newInds,
1588  const ELocalGlobal lg,
1589  const ELocalGlobal I)
1590  {
1591  using Teuchos::ArrayView;
1592  typedef LocalOrdinal LO;
1593  typedef GlobalOrdinal GO;
1594  const char tfecfFuncName[] = "insertIndices: ";
1595 
1596  size_t oldNumEnt = 0;
1597  if (debug_) {
1598  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1599  (lg != GlobalIndices && lg != LocalIndices, std::invalid_argument,
1600  "lg must be either GlobalIndices or LocalIndices.");
1601  oldNumEnt = this->getNumEntriesInLocalRow (rowinfo.localRow);
1602  }
1603 
1604  size_t numNewInds = 0;
1605  if (lg == GlobalIndices) { // input indices are global
1606  ArrayView<const GO> new_ginds = newInds.ginds;
1607  numNewInds = new_ginds.size();
1608  if (I == GlobalIndices) { // store global indices
1609  auto gind_view = gblInds_wdv.getHostView(Access::ReadWrite);
1610  if (debug_) {
1611  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1612  (static_cast<size_t> (gind_view.size ()) <
1613  rowinfo.numEntries + numNewInds, std::logic_error,
1614  "gind_view.size() = " << gind_view.size ()
1615  << " < rowinfo.numEntries (= " << rowinfo.numEntries
1616  << ") + numNewInds (= " << numNewInds << ").");
1617  }
1618  GO* const gblColInds_out = gind_view.data () + rowinfo.offset1D
1619  + rowinfo.numEntries;
1620  for (size_t k = 0; k < numNewInds; ++k) {
1621  gblColInds_out[k] = new_ginds[k];
1622  }
1623  }
1624  else if (I == LocalIndices) { // store local indices
1625  auto lind_view = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1626  if (debug_) {
1627  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1628  (static_cast<size_t> (lind_view.size ()) <
1629  rowinfo.numEntries + numNewInds, std::logic_error,
1630  "lind_view.size() = " << lind_view.size ()
1631  << " < rowinfo.numEntries (= " << rowinfo.numEntries
1632  << ") + numNewInds (= " << numNewInds << ").");
1633  }
1634  LO* const lclColInds_out = lind_view.data () + rowinfo.offset1D
1635  + rowinfo.numEntries;
1636  for (size_t k = 0; k < numNewInds; ++k) {
1637  lclColInds_out[k] = colMap_->getLocalElement (new_ginds[k]);
1638  }
1639  }
1640  }
1641  else if (lg == LocalIndices) { // input indices are local
1642  ArrayView<const LO> new_linds = newInds.linds;
1643  numNewInds = new_linds.size();
1644  if (I == LocalIndices) { // store local indices
1645  auto lind_view = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1646  if (debug_) {
1647  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1648  (static_cast<size_t> (lind_view.size ()) <
1649  rowinfo.numEntries + numNewInds, std::logic_error,
1650  "lind_view.size() = " << lind_view.size ()
1651  << " < rowinfo.numEntries (= " << rowinfo.numEntries
1652  << ") + numNewInds (= " << numNewInds << ").");
1653  }
1654  LO* const lclColInds_out = lind_view.data () + rowinfo.offset1D
1655  + rowinfo.numEntries;
1656  for (size_t k = 0; k < numNewInds; ++k) {
1657  lclColInds_out[k] = new_linds[k];
1658  }
1659  }
1660  else if (I == GlobalIndices) {
1661  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1662  (true, std::logic_error, "The case where the input indices are local "
1663  "and the indices to write are global (lg=LocalIndices, I="
1664  "GlobalIndices) is not implemented, because it does not make sense."
1665  << std::endl << "If you have correct local column indices, that "
1666  "means the graph has a column Map. In that case, you should be "
1667  "storing local indices.");
1668  }
1669  }
1670 
1671  rowinfo.numEntries += numNewInds;
1672  this->k_numRowEntries_(rowinfo.localRow) += numNewInds;
1673  this->setLocallyModified ();
1674 
1675  if (debug_) {
1676  const size_t chkNewNumEnt =
1677  this->getNumEntriesInLocalRow (rowinfo.localRow);
1678  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1679  (chkNewNumEnt != oldNumEnt + numNewInds, std::logic_error,
1680  "chkNewNumEnt = " << chkNewNumEnt
1681  << " != oldNumEnt (= " << oldNumEnt
1682  << ") + numNewInds (= " << numNewInds << ").");
1683  }
1684 
1685  return numNewInds;
1686  }
1687 
1688  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1689  size_t
1691  insertGlobalIndicesImpl (const LocalOrdinal lclRow,
1692  const GlobalOrdinal inputGblColInds[],
1693  const size_t numInputInds)
1694  {
1695  return this->insertGlobalIndicesImpl (this->getRowInfo (lclRow),
1696  inputGblColInds, numInputInds);
1697  }
1698 
1699  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1700  size_t
1703  const GlobalOrdinal inputGblColInds[],
1704  const size_t numInputInds,
1705  std::function<void(const size_t, const size_t, const size_t)> fun)
1706  {
1708  using Kokkos::View;
1709  using Kokkos::subview;
1710  using Kokkos::MemoryUnmanaged;
1711  using Teuchos::ArrayView;
1712  using LO = LocalOrdinal;
1713  using GO = GlobalOrdinal;
1714  const char tfecfFuncName[] = "insertGlobalIndicesImpl: ";
1715  const LO lclRow = static_cast<LO> (rowInfo.localRow);
1716 
1717  auto numEntries = rowInfo.numEntries;
1718  using inp_view_type = View<const GO*, Kokkos::HostSpace, MemoryUnmanaged>;
1719  inp_view_type inputInds(inputGblColInds, numInputInds);
1720  size_t numInserted;
1721  {
1722  auto gblIndsHostView = this->gblInds_wdv.getHostView(Access::ReadWrite);
1723  numInserted = Details::insertCrsIndices(lclRow, this->getRowPtrsUnpackedHost(),
1724  gblIndsHostView,
1725  numEntries, inputInds, fun);
1726  }
1727 
1728  const bool insertFailed =
1729  numInserted == Teuchos::OrdinalTraits<size_t>::invalid();
1730  if(insertFailed) {
1731  constexpr size_t ONE (1);
1732  const int myRank = this->getComm()->getRank();
1733  std::ostringstream os;
1734 
1735  os << "Proc " << myRank << ": Not enough capacity to insert "
1736  << numInputInds
1737  << " ind" << (numInputInds != ONE ? "ices" : "ex")
1738  << " into local row " << lclRow << ", which currently has "
1739  << rowInfo.numEntries
1740  << " entr" << (rowInfo.numEntries != ONE ? "ies" : "y")
1741  << " and total allocation size " << rowInfo.allocSize
1742  << ". ";
1743  const size_t maxNumToPrint =
1745  ArrayView<const GO> inputGblColIndsView(inputGblColInds,
1746  numInputInds);
1747  verbosePrintArray(os, inputGblColIndsView, "Input global "
1748  "column indices", maxNumToPrint);
1749  os << ", ";
1750  auto curGblColInds = getGlobalIndsViewHost(rowInfo);
1751  ArrayView<const GO> curGblColIndsView(curGblColInds.data(),
1752  rowInfo.numEntries);
1753  verbosePrintArray(os, curGblColIndsView, "Current global "
1754  "column indices", maxNumToPrint);
1755  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1756  (true, std::runtime_error, os.str());
1757  }
1758 
1759  this->k_numRowEntries_(lclRow) += numInserted;
1760 
1761  this->setLocallyModified();
1762  return numInserted;
1763  }
1764 
1765 
1766  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1767  void
1769  insertLocalIndicesImpl (const LocalOrdinal myRow,
1770  const Teuchos::ArrayView<const LocalOrdinal>& indices,
1771  std::function<void(const size_t, const size_t, const size_t)> fun)
1772  {
1773  using Kokkos::MemoryUnmanaged;
1774  using Kokkos::subview;
1775  using Kokkos::View;
1776  using LO = LocalOrdinal;
1777  const char tfecfFuncName[] = "insertLocallIndicesImpl: ";
1778 
1779  const RowInfo rowInfo = this->getRowInfo(myRow);
1780 
1781  size_t numNewInds = 0;
1782  size_t newNumEntries = 0;
1783 
1784  auto numEntries = rowInfo.numEntries;
1785  // Note: Teuchos::ArrayViews are in HostSpace
1786  using inp_view_type = View<const LO*, Kokkos::HostSpace, MemoryUnmanaged>;
1787  inp_view_type inputInds(indices.getRawPtr(), indices.size());
1788  size_t numInserted = 0;
1789  {
1790  auto lclInds = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1791  numInserted = Details::insertCrsIndices(myRow, this->getRowPtrsUnpackedHost(), lclInds,
1792  numEntries, inputInds, fun);
1793  }
1794 
1795  const bool insertFailed =
1796  numInserted == Teuchos::OrdinalTraits<size_t>::invalid();
1797  if(insertFailed) {
1798  constexpr size_t ONE (1);
1799  const size_t numInputInds(indices.size());
1800  const int myRank = this->getComm()->getRank();
1801  std::ostringstream os;
1802  os << "On MPI Process " << myRank << ": Not enough capacity to "
1803  "insert " << numInputInds
1804  << " ind" << (numInputInds != ONE ? "ices" : "ex")
1805  << " into local row " << myRow << ", which currently has "
1806  << rowInfo.numEntries
1807  << " entr" << (rowInfo.numEntries != ONE ? "ies" : "y")
1808  << " and total allocation size " << rowInfo.allocSize << ".";
1809  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1810  (true, std::runtime_error, os.str());
1811  }
1812  numNewInds = numInserted;
1813  newNumEntries = rowInfo.numEntries + numNewInds;
1814 
1815  this->k_numRowEntries_(myRow) += numNewInds;
1816  this->setLocallyModified ();
1817 
1818  if (debug_) {
1819  const size_t chkNewNumEntries = this->getNumEntriesInLocalRow (myRow);
1820  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1821  (chkNewNumEntries != newNumEntries, std::logic_error,
1822  "getNumEntriesInLocalRow(" << myRow << ") = " << chkNewNumEntries
1823  << " != newNumEntries = " << newNumEntries
1824  << ". Please report this bug to the Tpetra developers.");
1825  }
1826  }
1827 
1828  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1829  size_t
1832  const Teuchos::ArrayView<const GlobalOrdinal>& indices,
1833  std::function<void(const size_t, const size_t, const size_t)> fun) const
1834  {
1835  using GO = GlobalOrdinal;
1836  using Kokkos::View;
1837  using Kokkos::MemoryUnmanaged;
1838  auto invalidCount = Teuchos::OrdinalTraits<size_t>::invalid();
1839 
1840  using inp_view_type = View<const GO*, Kokkos::HostSpace, MemoryUnmanaged>;
1841  inp_view_type inputInds(indices.getRawPtr(), indices.size());
1842 
1843  size_t numFound = 0;
1844  LocalOrdinal lclRow = rowInfo.localRow;
1845  if (this->isLocallyIndexed())
1846  {
1847  if (this->colMap_.is_null())
1848  return invalidCount;
1849  const auto& colMap = *(this->colMap_);
1850  auto map = [&](GO const gblInd){return colMap.getLocalElement(gblInd);};
1851  numFound = Details::findCrsIndices(lclRow, this->getRowPtrsUnpackedHost(),
1852  rowInfo.numEntries,
1853  lclIndsUnpacked_wdv.getHostView(Access::ReadOnly), inputInds, map, fun);
1854  }
1855  else if (this->isGloballyIndexed())
1856  {
1857  numFound = Details::findCrsIndices(lclRow, this->getRowPtrsUnpackedHost(),
1858  rowInfo.numEntries,
1859  gblInds_wdv.getHostView(Access::ReadOnly), inputInds, fun);
1860  }
1861  return numFound;
1862  }
1863 
1864 
1865  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1866  size_t
1869  const bool sorted,
1870  const bool merged)
1871  {
1872  const size_t origNumEnt = rowInfo.numEntries;
1873  if (origNumEnt != Tpetra::Details::OrdinalTraits<size_t>::invalid () &&
1874  origNumEnt != 0) {
1875  auto lclColInds = this->getLocalIndsViewHostNonConst (rowInfo);
1876 
1877  LocalOrdinal* const lclColIndsRaw = lclColInds.data ();
1878  if (! sorted) {
1879  std::sort (lclColIndsRaw, lclColIndsRaw + origNumEnt);
1880  }
1881 
1882  if (! merged) {
1883  LocalOrdinal* const beg = lclColIndsRaw;
1884  LocalOrdinal* const end = beg + rowInfo.numEntries;
1885  LocalOrdinal* const newend = std::unique (beg, end);
1886  const size_t newNumEnt = newend - beg;
1887 
1888  // NOTE (mfh 08 May 2017) This is a host View, so it does not assume UVM.
1889  this->k_numRowEntries_(rowInfo.localRow) = newNumEnt;
1890  return origNumEnt - newNumEnt; // the number of duplicates in the row
1891  }
1892  else {
1893  return static_cast<size_t> (0); // assume no duplicates
1894  }
1895  }
1896  else {
1897  return static_cast<size_t> (0); // no entries in the row
1898  }
1899  }
1900 
1901 
1902  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1903  void
1905  setDomainRangeMaps (const Teuchos::RCP<const map_type>& domainMap,
1906  const Teuchos::RCP<const map_type>& rangeMap)
1907  {
1908  // simple pointer comparison for equality
1909  if (domainMap_ != domainMap) {
1910  domainMap_ = domainMap;
1911  importer_ = Teuchos::null;
1912  }
1913  if (rangeMap_ != rangeMap) {
1914  rangeMap_ = rangeMap;
1915  exporter_ = Teuchos::null;
1916  }
1917  }
1918 
1919 
1920  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1921  void
1924  {
1925  const auto INV = Teuchos::OrdinalTraits<global_size_t>::invalid();
1926 
1927  globalNumEntries_ = INV;
1928  globalMaxNumRowEntries_ = INV;
1929  haveGlobalConstants_ = false;
1930  }
1931 
1932 
1933  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1934  void
1937  {
1938  if (debug_) {
1939  using std::endl;
1940  const char tfecfFuncName[] = "checkInternalState: ";
1941  const char suffix[] = " Please report this bug to the Tpetra developers.";
1942 
1943  std::unique_ptr<std::string> prefix;
1944  if (verbose_) {
1945  prefix = this->createPrefix("CrsGraph", "checkInternalState");
1946  std::ostringstream os;
1947  os << *prefix << "Start" << endl;
1948  std::cerr << os.str();
1949  }
1950 
1951  const global_size_t GSTI = Teuchos::OrdinalTraits<global_size_t>::invalid ();
1952  //const size_t STI = Teuchos::OrdinalTraits<size_t>::invalid (); // unused
1953  // check the internal state of this data structure
1954  // this is called by numerous state-changing methods, in a debug build, to ensure that the object
1955  // always remains in a valid state
1956 
1957  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1958  (this->rowMap_.is_null (), std::logic_error,
1959  "Row Map is null." << suffix);
1960  // This may access the row Map, so we need to check first (above)
1961  // whether the row Map is null.
1962  const LocalOrdinal lclNumRows =
1963  static_cast<LocalOrdinal> (this->getLocalNumRows ());
1964 
1965  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1966  (this->isFillActive () == this->isFillComplete (), std::logic_error,
1967  "Graph cannot be both fill active and fill complete." << suffix);
1968  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1969  (this->isFillComplete () &&
1970  (this->colMap_.is_null () ||
1971  this->rangeMap_.is_null () ||
1972  this->domainMap_.is_null ()),
1973  std::logic_error,
1974  "Graph is full complete, but at least one of {column, range, domain} "
1975  "Map is null." << suffix);
1976  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1977  (this->isStorageOptimized () && ! this->indicesAreAllocated (),
1978  std::logic_error, "Storage is optimized, but indices are not "
1979  "allocated, not even trivially." << suffix);
1980 
1981  size_t nodeAllocSize = 0;
1982  try {
1983  nodeAllocSize = this->getLocalAllocationSize ();
1984  }
1985  catch (std::logic_error& e) {
1986  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1987  (true, std::runtime_error, "getLocalAllocationSize threw "
1988  "std::logic_error: " << e.what ());
1989  }
1990  catch (std::exception& e) {
1991  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1992  (true, std::runtime_error, "getLocalAllocationSize threw an "
1993  "std::exception: " << e.what ());
1994  }
1995  catch (...) {
1996  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1997  (true, std::runtime_error, "getLocalAllocationSize threw an exception "
1998  "not a subclass of std::exception.");
1999  }
2000 
2001  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2002  (this->isStorageOptimized () &&
2003  nodeAllocSize != this->getLocalNumEntries (),
2004  std::logic_error, "Storage is optimized, but "
2005  "this->getLocalAllocationSize() = " << nodeAllocSize
2006  << " != this->getLocalNumEntries() = " << this->getLocalNumEntries ()
2007  << "." << suffix);
2008  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2009  (! this->haveGlobalConstants_ &&
2010  (this->globalNumEntries_ != GSTI ||
2011  this->globalMaxNumRowEntries_ != GSTI),
2012  std::logic_error, "Graph claims not to have global constants, but "
2013  "some of the global constants are not marked as invalid." << suffix);
2014  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2015  (this->haveGlobalConstants_ &&
2016  (this->globalNumEntries_ == GSTI ||
2017  this->globalMaxNumRowEntries_ == GSTI),
2018  std::logic_error, "Graph claims to have global constants, but "
2019  "some of them are marked as invalid." << suffix);
2020  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2021  (this->haveGlobalConstants_ &&
2022  (this->globalNumEntries_ < this->getLocalNumEntries () ||
2023  this->globalMaxNumRowEntries_ < this->nodeMaxNumRowEntries_),
2024  std::logic_error, "Graph claims to have global constants, and "
2025  "all of the values of the global constants are valid, but "
2026  "some of the local constants are greater than "
2027  "their corresponding global constants." << suffix);
2028  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2029  (this->indicesAreAllocated () &&
2030  (this->numAllocForAllRows_ != 0 ||
2031  this->k_numAllocPerRow_.extent (0) != 0),
2032  std::logic_error, "The graph claims that its indices are allocated, but "
2033  "either numAllocForAllRows_ (= " << this->numAllocForAllRows_ << ") is "
2034  "nonzero, or k_numAllocPerRow_ has nonzero dimension. In other words, "
2035  "the graph is supposed to release its \"allocation specifications\" "
2036  "when it allocates its indices." << suffix);
2037  auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
2038  auto rowPtrsUnpacked_dev = this->getRowPtrsUnpackedDevice();
2039  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2040  (rowPtrsUnpacked_host.extent(0) != rowPtrsUnpacked_dev.extent(0),
2041  std::logic_error, "The host and device views of k_rowPtrs_ have "
2042  "different sizes; rowPtrsUnpacked_host_ has size "
2043  << rowPtrsUnpacked_host.extent(0)
2044  << ", but rowPtrsUnpacked_dev_ has size "
2045  << rowPtrsUnpacked_dev.extent(0)
2046  << "." << suffix);
2047  if (isGloballyIndexed() && rowPtrsUnpacked_host.extent(0) != 0) {
2048  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2049  (size_t(rowPtrsUnpacked_host.extent(0)) != size_t(lclNumRows + 1),
2050  std::logic_error, "The graph is globally indexed and "
2051  "k_rowPtrs has nonzero size " << rowPtrsUnpacked_host.extent(0)
2052  << ", but that size does not equal lclNumRows+1 = "
2053  << (lclNumRows+1) << "." << suffix);
2054  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2055  (rowPtrsUnpacked_host(lclNumRows) != size_t(gblInds_wdv.extent(0)),
2056  std::logic_error, "The graph is globally indexed and "
2057  "k_rowPtrs_ has nonzero size " << rowPtrsUnpacked_host.extent(0)
2058  << ", but k_rowPtrs_(lclNumRows=" << lclNumRows << ")="
2059  << rowPtrsUnpacked_host(lclNumRows)
2060  << " != gblInds_wdv.extent(0)="
2061  << gblInds_wdv.extent(0) << "." << suffix);
2062  }
2063  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2064  (this->isLocallyIndexed () &&
2065  rowPtrsUnpacked_host.extent (0) != 0 &&
2066  (static_cast<size_t> (rowPtrsUnpacked_host.extent (0)) !=
2067  static_cast<size_t> (lclNumRows + 1) ||
2068  rowPtrsUnpacked_host(lclNumRows) !=
2069  static_cast<size_t> (this->lclIndsUnpacked_wdv.extent (0))),
2070  std::logic_error, "If k_rowPtrs_ has nonzero size and "
2071  "the graph is locally indexed, then "
2072  "k_rowPtrs_ must have N+1 rows, and "
2073  "k_rowPtrs_(N) must equal lclIndsUnpacked_wdv.extent(0)." << suffix);
2074 
2075  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2076  (this->indicesAreAllocated () &&
2077  nodeAllocSize > 0 &&
2078  this->lclIndsUnpacked_wdv.extent (0) == 0 &&
2079  this->gblInds_wdv.extent (0) == 0,
2080  std::logic_error, "Graph is allocated nontrivially, but "
2081  "but 1-D allocations are not present." << suffix);
2082 
2083  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2084  (! this->indicesAreAllocated () &&
2085  ((rowPtrsUnpacked_host.extent (0) != 0 ||
2086  this->k_numRowEntries_.extent (0) != 0) ||
2087  this->lclIndsUnpacked_wdv.extent (0) != 0 ||
2088  this->gblInds_wdv.extent (0) != 0),
2089  std::logic_error, "If indices are not allocated, "
2090  "then none of the buffers should be." << suffix);
2091  // indices may be local or global only if they are allocated
2092  // (numAllocated is redundant; could simply be indicesAreLocal_ ||
2093  // indicesAreGlobal_)
2094  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2095  ((this->indicesAreLocal_ || this->indicesAreGlobal_) &&
2096  ! this->indicesAreAllocated_,
2097  std::logic_error, "Indices may be local or global only if they are "
2098  "allocated." << suffix);
2099  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2100  (this->indicesAreLocal_ && this->indicesAreGlobal_,
2101  std::logic_error, "Indices may not be both local and global." << suffix);
2102  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2103  (indicesAreLocal_ && gblInds_wdv.extent (0) != 0,
2104  std::logic_error, "Indices are local, but "
2105  "gblInds_wdv.extent(0) (= " << gblInds_wdv.extent (0)
2106  << ") != 0. In other words, if indices are local, then "
2107  "allocations of global indices should not be present."
2108  << suffix);
2109  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2110  (indicesAreGlobal_ && lclIndsUnpacked_wdv.extent (0) != 0,
2111  std::logic_error, "Indices are global, but "
2112  "lclIndsUnpacked_wdv.extent(0) (= " << lclIndsUnpacked_wdv.extent(0)
2113  << ") != 0. In other words, if indices are global, "
2114  "then allocations for local indices should not be present."
2115  << suffix);
2116  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2117  (indicesAreLocal_ && nodeAllocSize > 0 &&
2118  lclIndsUnpacked_wdv.extent (0) == 0 && getLocalNumRows () > 0,
2119  std::logic_error, "Indices are local and "
2120  "getLocalAllocationSize() = " << nodeAllocSize << " > 0, but "
2121  "lclIndsUnpacked_wdv.extent(0) = 0 and getLocalNumRows() = "
2122  << getLocalNumRows () << " > 0." << suffix);
2123  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2124  (indicesAreGlobal_ && nodeAllocSize > 0 &&
2125  gblInds_wdv.extent (0) == 0 && getLocalNumRows () > 0,
2126  std::logic_error, "Indices are global and "
2127  "getLocalAllocationSize() = " << nodeAllocSize << " > 0, but "
2128  "gblInds_wdv.extent(0) = 0 and getLocalNumRows() = "
2129  << getLocalNumRows () << " > 0." << suffix);
2130  // check the actual allocations
2131  if (this->indicesAreAllocated () &&
2132  rowPtrsUnpacked_host.extent (0) != 0) {
2133  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2134  (static_cast<size_t> (rowPtrsUnpacked_host.extent (0)) !=
2135  this->getLocalNumRows () + 1,
2136  std::logic_error, "Indices are allocated and "
2137  "k_rowPtrs_ has nonzero length, but rowPtrsUnpacked_host_.extent(0) = "
2138  << rowPtrsUnpacked_host.extent (0) << " != getLocalNumRows()+1 = "
2139  << (this->getLocalNumRows () + 1) << "." << suffix);
2140  const size_t actualNumAllocated =
2141  rowPtrsUnpacked_host(this->getLocalNumRows());
2142  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2143  (this->isLocallyIndexed () &&
2144  static_cast<size_t> (this->lclIndsUnpacked_wdv.extent (0)) != actualNumAllocated,
2145  std::logic_error, "Graph is locally indexed, indices are "
2146  "are allocated, and k_rowPtrs_ has nonzero length, but "
2147  "lclIndsUnpacked_wdv.extent(0) = " << this->lclIndsUnpacked_wdv.extent (0)
2148  << " != actualNumAllocated = " << actualNumAllocated << suffix);
2149  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2150  (this->isGloballyIndexed () &&
2151  static_cast<size_t> (this->gblInds_wdv.extent (0)) != actualNumAllocated,
2152  std::logic_error, "Graph is globally indexed, indices "
2153  "are allocated, and k_rowPtrs_ has nonzero length, but "
2154  "gblInds_wdv.extent(0) = " << this->gblInds_wdv.extent (0)
2155  << " != actualNumAllocated = " << actualNumAllocated << suffix);
2156  }
2157 
2158  if (verbose_) {
2159  std::ostringstream os;
2160  os << *prefix << "Done" << endl;
2161  std::cerr << os.str();
2162  }
2163  }
2164  }
2165 
2166 
2167  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2168  size_t
2170  getNumEntriesInGlobalRow (GlobalOrdinal globalRow) const
2171  {
2172  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (globalRow);
2173  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2174  return Teuchos::OrdinalTraits<size_t>::invalid ();
2175  }
2176  else {
2177  return rowInfo.numEntries;
2178  }
2179  }
2180 
2181 
2182  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2183  size_t
2185  getNumEntriesInLocalRow (LocalOrdinal localRow) const
2186  {
2187  const RowInfo rowInfo = this->getRowInfo (localRow);
2188  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2189  return Teuchos::OrdinalTraits<size_t>::invalid ();
2190  }
2191  else {
2192  return rowInfo.numEntries;
2193  }
2194  }
2195 
2196 
2197  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2198  size_t
2200  getNumAllocatedEntriesInGlobalRow (GlobalOrdinal globalRow) const
2201  {
2202  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (globalRow);
2203  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2204  return Teuchos::OrdinalTraits<size_t>::invalid ();
2205  }
2206  else {
2207  return rowInfo.allocSize;
2208  }
2209  }
2210 
2211 
2212  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2213  size_t
2215  getNumAllocatedEntriesInLocalRow (LocalOrdinal localRow) const
2216  {
2217  const RowInfo rowInfo = this->getRowInfo (localRow);
2218  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2219  return Teuchos::OrdinalTraits<size_t>::invalid ();
2220  }
2221  else {
2222  return rowInfo.allocSize;
2223  }
2224  }
2225 
2226 
2227  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2228  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::row_ptrs_host_view_type
2231  {
2232  return getRowPtrsPackedHost();
2233  }
2234 
2235  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2236  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::row_ptrs_device_view_type
2239  {
2240  return getRowPtrsPackedDevice();
2241  }
2242 
2243 
2244  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2245  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::local_inds_host_view_type
2248  {
2249  return lclIndsPacked_wdv.getHostView(Access::ReadOnly);
2250  }
2251 
2252  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2256  {
2257  return lclIndsPacked_wdv.getDeviceView(Access::ReadOnly);
2258  }
2259 
2260  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2261  void
2263  getLocalRowCopy (LocalOrdinal localRow,
2264  nonconst_local_inds_host_view_type & indices,
2265  size_t& numEntries) const
2266  {
2267  using Teuchos::ArrayView;
2268  const char tfecfFuncName[] = "getLocalRowCopy: ";
2269 
2270  TEUCHOS_TEST_FOR_EXCEPTION(
2271  isGloballyIndexed () && ! hasColMap (), std::runtime_error,
2272  "Tpetra::CrsGraph::getLocalRowCopy: The graph is globally indexed and "
2273  "does not have a column Map yet. That means we don't have local indices "
2274  "for columns yet, so it doesn't make sense to call this method. If the "
2275  "graph doesn't have a column Map yet, you should call fillComplete on "
2276  "it first.");
2277 
2278  // This does the right thing (reports an empty row) if the input
2279  // row is invalid.
2280  const RowInfo rowinfo = this->getRowInfo (localRow);
2281  // No side effects on error.
2282  const size_t theNumEntries = rowinfo.numEntries;
2283  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2284  (static_cast<size_t> (indices.size ()) < theNumEntries,std::runtime_error,
2285  "Specified storage (size==" << indices.size () << ") does not suffice "
2286  "to hold all " << theNumEntries << " entry/ies for this row.");
2287  numEntries = theNumEntries;
2288 
2289  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2290  if (isLocallyIndexed ()) {
2291  auto lclInds = getLocalIndsViewHost(rowinfo);
2292  for (size_t j = 0; j < theNumEntries; ++j) {
2293  indices[j] = lclInds(j);
2294  }
2295  }
2296  else if (isGloballyIndexed ()) {
2297  auto gblInds = getGlobalIndsViewHost(rowinfo);
2298  for (size_t j = 0; j < theNumEntries; ++j) {
2299  indices[j] = colMap_->getLocalElement (gblInds(j));
2300  }
2301  }
2302  }
2303  }
2304 
2305 
2306  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2307  void
2309  getGlobalRowCopy (GlobalOrdinal globalRow,
2310  nonconst_global_inds_host_view_type &indices,
2311  size_t& numEntries) const
2312  {
2313  using Teuchos::ArrayView;
2314  const char tfecfFuncName[] = "getGlobalRowCopy: ";
2315 
2316  // This does the right thing (reports an empty row) if the input
2317  // row is invalid.
2318  const RowInfo rowinfo = getRowInfoFromGlobalRowIndex (globalRow);
2319  const size_t theNumEntries = rowinfo.numEntries;
2320  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2321  static_cast<size_t> (indices.size ()) < theNumEntries, std::runtime_error,
2322  "Specified storage (size==" << indices.size () << ") does not suffice "
2323  "to hold all " << theNumEntries << " entry/ies for this row.");
2324  numEntries = theNumEntries; // first side effect
2325 
2326  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2327  if (isLocallyIndexed ()) {
2328  auto lclInds = getLocalIndsViewHost(rowinfo);
2329  for (size_t j = 0; j < theNumEntries; ++j) {
2330  indices[j] = colMap_->getGlobalElement (lclInds(j));
2331  }
2332  }
2333  else if (isGloballyIndexed ()) {
2334  auto gblInds = getGlobalIndsViewHost(rowinfo);
2335  for (size_t j = 0; j < theNumEntries; ++j) {
2336  indices[j] = gblInds(j);
2337  }
2338  }
2339  }
2340  }
2341 
2342 
2343  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2344  void
2347  const LocalOrdinal localRow,
2348  local_inds_host_view_type &indices) const
2349  {
2350  const char tfecfFuncName[] = "getLocalRowView: ";
2351 
2352  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2353  (isGloballyIndexed (), std::runtime_error, "The graph's indices are "
2354  "currently stored as global indices, so we cannot return a view with "
2355  "local column indices, whether or not the graph has a column Map. If "
2356  "the graph _does_ have a column Map, use getLocalRowCopy() instead.");
2357 
2358  const RowInfo rowInfo = getRowInfo (localRow);
2359  if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2360  rowInfo.numEntries > 0) {
2361  indices = lclIndsUnpacked_wdv.getHostSubview(rowInfo.offset1D,
2362  rowInfo.numEntries,
2363  Access::ReadOnly);
2364  }
2365  else {
2366  // This does the right thing (reports an empty row) if the input
2367  // row is invalid.
2368  indices = local_inds_host_view_type();
2369  }
2370 
2371  if (debug_) {
2372  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2373  (static_cast<size_t> (indices.size ()) !=
2374  getNumEntriesInLocalRow (localRow), std::logic_error, "indices.size() "
2375  "= " << indices.extent(0) << " != getNumEntriesInLocalRow(localRow=" <<
2376  localRow << ") = " << getNumEntriesInLocalRow(localRow) <<
2377  ". Please report this bug to the Tpetra developers.");
2378  }
2379  }
2380 
2381 
2382  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2383  void
2386  const GlobalOrdinal globalRow,
2387  global_inds_host_view_type &indices) const
2388  {
2389  const char tfecfFuncName[] = "getGlobalRowView: ";
2390 
2391  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2392  (isLocallyIndexed (), std::runtime_error, "The graph's indices are "
2393  "currently stored as local indices, so we cannot return a view with "
2394  "global column indices. Use getGlobalRowCopy() instead.");
2395 
2396  // This does the right thing (reports an empty row) if the input
2397  // row is invalid.
2398  const RowInfo rowInfo = getRowInfoFromGlobalRowIndex (globalRow);
2399  if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2400  rowInfo.numEntries > 0) {
2401  indices = gblInds_wdv.getHostSubview(rowInfo.offset1D,
2402  rowInfo.numEntries,
2403  Access::ReadOnly);
2404  }
2405  else {
2406  indices = typename global_inds_dualv_type::t_host::const_type();
2407  }
2408  if (debug_) {
2409  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2410  (static_cast<size_t> (indices.size ()) !=
2411  getNumEntriesInGlobalRow (globalRow),
2412  std::logic_error, "indices.size() = " << indices.extent(0)
2413  << " != getNumEntriesInGlobalRow(globalRow=" << globalRow << ") = "
2414  << getNumEntriesInGlobalRow (globalRow)
2415  << ". Please report this bug to the Tpetra developers.");
2416  }
2417  }
2418 
2419 
2420  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2421  void
2423  insertLocalIndices (const LocalOrdinal localRow,
2424  const Teuchos::ArrayView<const LocalOrdinal>& indices)
2425  {
2426  const char tfecfFuncName[] = "insertLocalIndices: ";
2427 
2428  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2429  (! isFillActive (), std::runtime_error, "Fill must be active.");
2430  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2431  (isGloballyIndexed (), std::runtime_error,
2432  "Graph indices are global; use insertGlobalIndices().");
2433  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2434  (! hasColMap (), std::runtime_error,
2435  "Cannot insert local indices without a column Map.");
2436  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2437  (! rowMap_->isNodeLocalElement (localRow), std::runtime_error,
2438  "Local row index " << localRow << " is not in the row Map "
2439  "on the calling process.");
2440  if (! indicesAreAllocated ()) {
2441  allocateIndices (LocalIndices, verbose_);
2442  }
2443 
2444  if (debug_) {
2445  // In debug mode, if the graph has a column Map, test whether any
2446  // of the given column indices are not in the column Map. Keep
2447  // track of the invalid column indices so we can tell the user
2448  // about them.
2449  if (hasColMap ()) {
2450  using Teuchos::Array;
2451  using Teuchos::toString;
2452  using std::endl;
2453  typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type size_type;
2454 
2455  const map_type& colMap = *colMap_;
2456  Array<LocalOrdinal> badColInds;
2457  bool allInColMap = true;
2458  for (size_type k = 0; k < indices.size (); ++k) {
2459  if (! colMap.isNodeLocalElement (indices[k])) {
2460  allInColMap = false;
2461  badColInds.push_back (indices[k]);
2462  }
2463  }
2464  if (! allInColMap) {
2465  std::ostringstream os;
2466  os << "Tpetra::CrsGraph::insertLocalIndices: You attempted to insert "
2467  "entries in owned row " << localRow << ", at the following column "
2468  "indices: " << toString (indices) << "." << endl;
2469  os << "Of those, the following indices are not in the column Map on "
2470  "this process: " << toString (badColInds) << "." << endl << "Since "
2471  "the graph has a column Map already, it is invalid to insert entries "
2472  "at those locations.";
2473  TEUCHOS_TEST_FOR_EXCEPTION(! allInColMap, std::invalid_argument, os.str ());
2474  }
2475  }
2476  }
2477 
2478  insertLocalIndicesImpl (localRow, indices);
2479 
2480  if (debug_) {
2481  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2482  (! indicesAreAllocated () || ! isLocallyIndexed (), std::logic_error,
2483  "At the end of insertLocalIndices, ! indicesAreAllocated() || "
2484  "! isLocallyIndexed() is true. Please report this bug to the "
2485  "Tpetra developers.");
2486  }
2487  }
2488 
2489  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2490  void
2492  insertLocalIndices (const LocalOrdinal localRow,
2493  const LocalOrdinal numEnt,
2494  const LocalOrdinal inds[])
2495  {
2496  Teuchos::ArrayView<const LocalOrdinal> indsT (inds, numEnt);
2497  this->insertLocalIndices (localRow, indsT);
2498  }
2499 
2500 
2501  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2502  void
2504  insertGlobalIndices (const GlobalOrdinal gblRow,
2505  const LocalOrdinal numInputInds,
2506  const GlobalOrdinal inputGblColInds[])
2507  {
2508  typedef LocalOrdinal LO;
2509  const char tfecfFuncName[] = "insertGlobalIndices: ";
2510 
2511  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2512  (this->isLocallyIndexed (), std::runtime_error,
2513  "graph indices are local; use insertLocalIndices().");
2514  // This can't really be satisfied for now, because if we are
2515  // fillComplete(), then we are local. In the future, this may
2516  // change. However, the rule that modification require active
2517  // fill will not change.
2518  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2519  (! this->isFillActive (), std::runtime_error,
2520  "You are not allowed to call this method if fill is not active. "
2521  "If fillComplete has been called, you must first call resumeFill "
2522  "before you may insert indices.");
2523  if (! indicesAreAllocated ()) {
2524  allocateIndices (GlobalIndices, verbose_);
2525  }
2526  const LO lclRow = this->rowMap_->getLocalElement (gblRow);
2527  if (lclRow != Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
2528  if (debug_) {
2529  if (this->hasColMap ()) {
2530  using std::endl;
2531  const map_type& colMap = * (this->colMap_);
2532  // In a debug build, keep track of the nonowned ("bad") column
2533  // indices, so that we can display them in the exception
2534  // message. In a release build, just ditch the loop early if
2535  // we encounter a nonowned column index.
2536  std::vector<GlobalOrdinal> badColInds;
2537  bool allInColMap = true;
2538  for (LO k = 0; k < numInputInds; ++k) {
2539  if (! colMap.isNodeGlobalElement (inputGblColInds[k])) {
2540  allInColMap = false;
2541  badColInds.push_back (inputGblColInds[k]);
2542  }
2543  }
2544  if (! allInColMap) {
2545  std::ostringstream os;
2546  os << "You attempted to insert entries in owned row " << gblRow
2547  << ", at the following column indices: [";
2548  for (LO k = 0; k < numInputInds; ++k) {
2549  os << inputGblColInds[k];
2550  if (k + static_cast<LO> (1) < numInputInds) {
2551  os << ",";
2552  }
2553  }
2554  os << "]." << endl << "Of those, the following indices are not in "
2555  "the column Map on this process: [";
2556  for (size_t k = 0; k < badColInds.size (); ++k) {
2557  os << badColInds[k];
2558  if (k + size_t (1) < badColInds.size ()) {
2559  os << ",";
2560  }
2561  }
2562  os << "]." << endl << "Since the matrix has a column Map already, "
2563  "it is invalid to insert entries at those locations.";
2564  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2565  (true, std::invalid_argument, os.str ());
2566  }
2567  }
2568  } // debug_
2569  this->insertGlobalIndicesImpl (lclRow, inputGblColInds, numInputInds);
2570  }
2571  else { // a nonlocal row
2572  this->insertGlobalIndicesIntoNonownedRows (gblRow, inputGblColInds,
2573  numInputInds);
2574  }
2575  }
2576 
2577 
2578  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2579  void
2581  insertGlobalIndices (const GlobalOrdinal gblRow,
2582  const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds)
2583  {
2584  this->insertGlobalIndices (gblRow, inputGblColInds.size (),
2585  inputGblColInds.getRawPtr ());
2586  }
2587 
2588 
2589  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2590  void
2592  insertGlobalIndicesFiltered (const LocalOrdinal lclRow,
2593  const GlobalOrdinal gblColInds[],
2594  const LocalOrdinal numGblColInds)
2595  {
2596  typedef LocalOrdinal LO;
2597  typedef GlobalOrdinal GO;
2598  const char tfecfFuncName[] = "insertGlobalIndicesFiltered: ";
2599 
2600  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2601  (this->isLocallyIndexed (), std::runtime_error,
2602  "Graph indices are local; use insertLocalIndices().");
2603  // This can't really be satisfied for now, because if we are
2604  // fillComplete(), then we are local. In the future, this may
2605  // change. However, the rule that modification require active
2606  // fill will not change.
2607  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2608  (! this->isFillActive (), std::runtime_error,
2609  "You are not allowed to call this method if fill is not active. "
2610  "If fillComplete has been called, you must first call resumeFill "
2611  "before you may insert indices.");
2612  if (! indicesAreAllocated ()) {
2613  allocateIndices (GlobalIndices, verbose_);
2614  }
2615 
2616  Teuchos::ArrayView<const GO> gblColInds_av (gblColInds, numGblColInds);
2617  // If we have a column Map, use it to filter the entries.
2618  if (! colMap_.is_null ()) {
2619  const map_type& colMap = * (this->colMap_);
2620 
2621  LO curOffset = 0;
2622  while (curOffset < numGblColInds) {
2623  // Find a sequence of input indices that are in the column Map
2624  // on the calling process. Doing a sequence at a time,
2625  // instead of one at a time, amortizes some overhead.
2626  LO endOffset = curOffset;
2627  for ( ; endOffset < numGblColInds; ++endOffset) {
2628  const LO lclCol = colMap.getLocalElement (gblColInds[endOffset]);
2629  if (lclCol == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
2630  break; // first entry, in current sequence, not in the column Map
2631  }
2632  }
2633  // curOffset, endOffset: half-exclusive range of indices in
2634  // the column Map on the calling process. If endOffset ==
2635  // curOffset, the range is empty.
2636  const LO numIndInSeq = (endOffset - curOffset);
2637  if (numIndInSeq != 0) {
2638  this->insertGlobalIndicesImpl (lclRow, gblColInds + curOffset,
2639  numIndInSeq);
2640  }
2641  // Invariant before this line: Either endOffset ==
2642  // numGblColInds, or gblColInds[endOffset] is not in the
2643  // column Map on the calling process.
2644  curOffset = endOffset + 1;
2645  }
2646  }
2647  else {
2648  this->insertGlobalIndicesImpl (lclRow, gblColInds_av.getRawPtr (),
2649  gblColInds_av.size ());
2650  }
2651  }
2652 
2653  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2654  void
2656  insertGlobalIndicesIntoNonownedRows (const GlobalOrdinal gblRow,
2657  const GlobalOrdinal gblColInds[],
2658  const LocalOrdinal numGblColInds)
2659  {
2660  // This creates the std::vector if it doesn't exist yet.
2661  // std::map's operator[] does a lookup each time, so it's better
2662  // to pull nonlocals_[grow] out of the loop.
2663  std::vector<GlobalOrdinal>& nonlocalRow = this->nonlocals_[gblRow];
2664  for (LocalOrdinal k = 0; k < numGblColInds; ++k) {
2665  // FIXME (mfh 20 Jul 2017) Would be better to use a set, in
2666  // order to avoid duplicates. globalAssemble() sorts these
2667  // anyway.
2668  nonlocalRow.push_back (gblColInds[k]);
2669  }
2670  }
2671 
2672  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2673  void
2675  removeLocalIndices (LocalOrdinal lrow)
2676  {
2677  const char tfecfFuncName[] = "removeLocalIndices: ";
2678  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2679  ! isFillActive (), std::runtime_error, "requires that fill is active.");
2680  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2681  isStorageOptimized (), std::runtime_error,
2682  "cannot remove indices after optimizeStorage() has been called.");
2683  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2684  isGloballyIndexed (), std::runtime_error, "graph indices are global.");
2685  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2686  ! rowMap_->isNodeLocalElement (lrow), std::runtime_error,
2687  "Local row " << lrow << " is not in the row Map on the calling process.");
2688  if (! indicesAreAllocated ()) {
2689  allocateIndices (LocalIndices, verbose_);
2690  }
2691 
2692  if (k_numRowEntries_.extent (0) != 0) {
2693  this->k_numRowEntries_(lrow) = 0;
2694  }
2695 
2696  if (debug_) {
2697  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2698  (getNumEntriesInLocalRow (lrow) != 0 ||
2699  ! indicesAreAllocated () ||
2700  ! isLocallyIndexed (), std::logic_error,
2701  "Violated stated post-conditions. Please contact Tpetra team.");
2702  }
2703  }
2704 
2705 
2706  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2707  void
2709  setAllIndices (const typename local_graph_device_type::row_map_type& rowPointers,
2710  const typename local_graph_device_type::entries_type::non_const_type& columnIndices)
2711  {
2712  using ProfilingRegion=Details::ProfilingRegion;
2713  ProfilingRegion region ("Tpetra::CrsGraph::setAllIndices");
2714  const char tfecfFuncName[] = "setAllIndices: ";
2715  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2716  ! hasColMap () || getColMap ().is_null (), std::runtime_error,
2717  "The graph must have a column Map before you may call this method.");
2718  LocalOrdinal numLocalRows = this->getLocalNumRows ();
2719  {
2720  LocalOrdinal rowPtrLen = rowPointers.size();
2721  if(numLocalRows == 0) {
2722  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2723  rowPtrLen != 0 && rowPtrLen != 1,
2724  std::runtime_error, "Have 0 local rows, but rowPointers.size() is neither 0 nor 1.");
2725  }
2726  else {
2727  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2728  rowPtrLen != numLocalRows + 1,
2729  std::runtime_error, "rowPointers.size() = " << rowPtrLen <<
2730  " != this->getLocalNumRows()+1 = " << (numLocalRows + 1) << ".");
2731  }
2732  }
2733 
2734  if(debug_) {
2735  using exec_space = typename local_graph_device_type::execution_space;
2736  int columnsOutOfBounds = 0;
2737  local_ordinal_type numLocalCols = this->getLocalNumCols();
2738  Kokkos::parallel_reduce(Kokkos::RangePolicy<exec_space>(0, columnIndices.extent(0)),
2739  KOKKOS_LAMBDA (const LocalOrdinal i, int& lOutOfBounds)
2740  {
2741  if(columnIndices(i) < 0 || columnIndices(i) >= numLocalCols)
2742  lOutOfBounds++;
2743  }, columnsOutOfBounds);
2744  int globalColsOutOfBounds= 0;
2745  auto comm = this->getComm();
2746  Teuchos::reduceAll<int, int> (*comm, Teuchos::REDUCE_MAX, columnsOutOfBounds,
2747  Teuchos::outArg (globalColsOutOfBounds));
2748  if (globalColsOutOfBounds)
2749  {
2750  std::string message;
2751  if (columnsOutOfBounds)
2752  {
2753  //Only print message from ranks with the problem
2754  message = std::string("ERROR, rank ") + std::to_string(comm->getRank()) + ", CrsGraph::setAllIndices(): provided columnIndices are not all within range [0, getLocalNumCols())!\n";
2755  }
2756  Details::gathervPrint(std::cout, message, *comm);
2757  throw std::invalid_argument("CrsGraph::setAllIndices(): columnIndices are out of the valid range on at least one process.");
2758  }
2759  }
2760 
2761  if (debug_ && this->isSorted()) {
2762  // Verify that the local indices are actually sorted
2763  int notSorted = 0;
2764  using exec_space = typename local_graph_device_type::execution_space;
2765  using size_type = typename local_graph_device_type::size_type;
2766  Kokkos::parallel_reduce(Kokkos::RangePolicy<exec_space>(0, numLocalRows),
2767  KOKKOS_LAMBDA (const LocalOrdinal i, int& lNotSorted)
2768  {
2769  size_type rowBegin = rowPointers(i);
2770  size_type rowEnd = rowPointers(i + 1);
2771  for(size_type j = rowBegin + 1; j < rowEnd; j++)
2772  {
2773  if(columnIndices(j - 1) > columnIndices(j))
2774  {
2775  lNotSorted = 1;
2776  }
2777  }
2778  }, notSorted);
2779  //All-reduce notSorted to avoid rank divergence
2780  int globalNotSorted = 0;
2781  auto comm = this->getComm();
2782  Teuchos::reduceAll<int, int> (*comm, Teuchos::REDUCE_MAX, notSorted,
2783  Teuchos::outArg (globalNotSorted));
2784  if (globalNotSorted)
2785  {
2786  std::string message;
2787  if (notSorted)
2788  {
2789  //Only print message from ranks with the problem
2790  message = std::string("ERROR, rank ") + std::to_string(comm->getRank()) + ", CrsGraph::setAllIndices(): provided columnIndices are not sorted!\n";
2791  }
2792  Details::gathervPrint(std::cout, message, *comm);
2793  throw std::invalid_argument("CrsGraph::setAllIndices(): provided columnIndices are not sorted within rows on at least one process.");
2794  }
2795  }
2796 
2797  indicesAreAllocated_ = true;
2798  indicesAreLocal_ = true;
2799  indicesAreSorted_ = true;
2800  noRedundancies_ = true;
2801  lclIndsPacked_wdv= local_inds_wdv_type(columnIndices);
2802  lclIndsUnpacked_wdv = lclIndsPacked_wdv;
2803  setRowPtrs(rowPointers);
2804 
2805  set_need_sync_host_uvm_access(); // columnIndices and rowPointers potentially still in a kernel
2806 
2807  // Storage MUST be packed, since the interface doesn't give any
2808  // way to indicate any extra space at the end of each row.
2809  storageStatus_ = Details::STORAGE_1D_PACKED;
2810 
2811  // These normally get cleared out at the end of allocateIndices.
2812  // It makes sense to clear them out here, because at the end of
2813  // this method, the graph is allocated on the calling process.
2814  numAllocForAllRows_ = 0;
2815  k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
2816 
2817  checkInternalState ();
2818  }
2819 
2820 
2821  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2822  void
2824  setAllIndices (const Teuchos::ArrayRCP<size_t>& rowPointers,
2825  const Teuchos::ArrayRCP<LocalOrdinal>& columnIndices)
2826  {
2827  using Kokkos::View;
2828  typedef typename local_graph_device_type::row_map_type row_map_type;
2829  typedef typename row_map_type::array_layout layout_type;
2830  typedef typename row_map_type::non_const_value_type row_offset_type;
2831  typedef View<size_t*, layout_type , Kokkos::HostSpace,
2832  Kokkos::MemoryUnmanaged> input_view_type;
2833  typedef typename row_map_type::non_const_type nc_row_map_type;
2834 
2835  const size_t size = static_cast<size_t> (rowPointers.size ());
2836  constexpr bool same = std::is_same<size_t, row_offset_type>::value;
2837  input_view_type ptr_in (rowPointers.getRawPtr (), size);
2838 
2839  nc_row_map_type ptr_rot ("Tpetra::CrsGraph::ptr", size);
2840 
2841  if constexpr (same) { // size_t == row_offset_type
2842  using lexecution_space = typename device_type::execution_space;
2843  Kokkos::deep_copy (lexecution_space(),
2844  ptr_rot,
2845  ptr_in);
2846  }
2847  else { // size_t != row_offset_type
2848  // CudaUvmSpace != HostSpace, so this will be false in that case.
2849  constexpr bool inHostMemory =
2850  std::is_same<typename row_map_type::memory_space,
2851  Kokkos::HostSpace>::value;
2852  if (inHostMemory) {
2853  // Copy (with cast from size_t to row_offset_type, with bounds
2854  // checking if necessary) to ptr_rot.
2855  ::Tpetra::Details::copyOffsets (ptr_rot, ptr_in);
2856  }
2857  else { // Copy input row offsets to device first.
2858  //
2859  // FIXME (mfh 24 Mar 2015) If CUDA UVM, running in the host's
2860  // execution space would avoid the double copy.
2861  //
2862  View<size_t*, layout_type, device_type> ptr_st ("Tpetra::CrsGraph::ptr", size);
2863 
2864  // DEEP_COPY REVIEW - NOT TESTED
2865  Kokkos::deep_copy (ptr_st, ptr_in);
2866  // Copy on device (casting from size_t to row_offset_type,
2867  // with bounds checking if necessary) to ptr_rot. This
2868  // executes in the output View's execution space, which is the
2869  // same as execution_space.
2870  ::Tpetra::Details::copyOffsets (ptr_rot, ptr_st);
2871  }
2872  }
2873 
2874  Kokkos::View<LocalOrdinal*, layout_type, device_type> k_ind =
2875  Kokkos::Compat::getKokkosViewDeepCopy<device_type> (columnIndices ());
2876  setAllIndices (ptr_rot, k_ind);
2877  }
2878 
2879 
2880  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2881  void
2884  {
2885  using Teuchos::Comm;
2886  using Teuchos::outArg;
2887  using Teuchos::RCP;
2888  using Teuchos::rcp;
2889  using Teuchos::REDUCE_MAX;
2890  using Teuchos::REDUCE_MIN;
2891  using Teuchos::reduceAll;
2892  using std::endl;
2893  using crs_graph_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
2894  using LO = local_ordinal_type;
2895  using GO = global_ordinal_type;
2896  using size_type = typename Teuchos::Array<GO>::size_type;
2897  const char tfecfFuncName[] = "globalAssemble: "; // for exception macro
2898 
2899  std::unique_ptr<std::string> prefix;
2900  if (verbose_) {
2901  prefix = this->createPrefix("CrsGraph", "globalAssemble");
2902  std::ostringstream os;
2903  os << *prefix << "Start" << endl;
2904  std::cerr << os.str();
2905  }
2906  RCP<const Comm<int> > comm = getComm ();
2907 
2908  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2909  (! isFillActive (), std::runtime_error, "Fill must be active before "
2910  "you may call this method.");
2911 
2912  const size_t myNumNonlocalRows = this->nonlocals_.size ();
2913 
2914  // If no processes have nonlocal rows, then we don't have to do
2915  // anything. Checking this is probably cheaper than constructing
2916  // the Map of nonlocal rows (see below) and noticing that it has
2917  // zero global entries.
2918  {
2919  const int iHaveNonlocalRows = (myNumNonlocalRows == 0) ? 0 : 1;
2920  int someoneHasNonlocalRows = 0;
2921  reduceAll<int, int> (*comm, REDUCE_MAX, iHaveNonlocalRows,
2922  outArg (someoneHasNonlocalRows));
2923  if (someoneHasNonlocalRows == 0) {
2924  if (verbose_) {
2925  std::ostringstream os;
2926  os << *prefix << "Done: No nonlocal rows" << endl;
2927  std::cerr << os.str();
2928  }
2929  return;
2930  }
2931  else if (verbose_) {
2932  std::ostringstream os;
2933  os << *prefix << "At least 1 process has nonlocal rows"
2934  << endl;
2935  std::cerr << os.str();
2936  }
2937  }
2938 
2939  // 1. Create a list of the "nonlocal" rows on each process. this
2940  // requires iterating over nonlocals_, so while we do this,
2941  // deduplicate the entries and get a count for each nonlocal
2942  // row on this process.
2943  // 2. Construct a new row Map corresponding to those rows. This
2944  // Map is likely overlapping. We know that the Map is not
2945  // empty on all processes, because the above all-reduce and
2946  // return exclude that case.
2947 
2948  RCP<const map_type> nonlocalRowMap;
2949  // Keep this for CrsGraph's constructor.
2950  Teuchos::Array<size_t> numEntPerNonlocalRow (myNumNonlocalRows);
2951  {
2952  Teuchos::Array<GO> myNonlocalGblRows (myNumNonlocalRows);
2953  size_type curPos = 0;
2954  for (auto mapIter = this->nonlocals_.begin ();
2955  mapIter != this->nonlocals_.end ();
2956  ++mapIter, ++curPos) {
2957  myNonlocalGblRows[curPos] = mapIter->first;
2958  std::vector<GO>& gblCols = mapIter->second; // by ref; change in place
2959  std::sort (gblCols.begin (), gblCols.end ());
2960  auto vecLast = std::unique (gblCols.begin (), gblCols.end ());
2961  gblCols.erase (vecLast, gblCols.end ());
2962  numEntPerNonlocalRow[curPos] = gblCols.size ();
2963  }
2964 
2965  // Currently, Map requires that its indexBase be the global min
2966  // of all its global indices. Map won't compute this for us, so
2967  // we must do it. If our process has no nonlocal rows, set the
2968  // "min" to the max possible GO value. This ensures that if
2969  // some process has at least one nonlocal row, then it will pick
2970  // that up as the min. We know that at least one process has a
2971  // nonlocal row, since the all-reduce and return at the top of
2972  // this method excluded that case.
2973  GO myMinNonlocalGblRow = std::numeric_limits<GO>::max ();
2974  {
2975  auto iter = std::min_element (myNonlocalGblRows.begin (),
2976  myNonlocalGblRows.end ());
2977  if (iter != myNonlocalGblRows.end ()) {
2978  myMinNonlocalGblRow = *iter;
2979  }
2980  }
2981  GO gblMinNonlocalGblRow = 0;
2982  reduceAll<int, GO> (*comm, REDUCE_MIN, myMinNonlocalGblRow,
2983  outArg (gblMinNonlocalGblRow));
2984  const GO indexBase = gblMinNonlocalGblRow;
2985  const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid ();
2986  nonlocalRowMap = rcp (new map_type (INV, myNonlocalGblRows (), indexBase, comm));
2987  }
2988 
2989  if (verbose_) {
2990  std::ostringstream os;
2991  os << *prefix << "nonlocalRowMap->getIndexBase()="
2992  << nonlocalRowMap->getIndexBase() << endl;
2993  std::cerr << os.str();
2994  }
2995 
2996  // 3. Use the column indices for each nonlocal row, as stored in
2997  // nonlocals_, to construct a CrsGraph corresponding to
2998  // nonlocal rows. We need, but we have, exact counts of the
2999  // number of entries in each nonlocal row.
3000 
3001  RCP<crs_graph_type> nonlocalGraph =
3002  rcp(new crs_graph_type(nonlocalRowMap, numEntPerNonlocalRow()));
3003  {
3004  size_type curPos = 0;
3005  for (auto mapIter = this->nonlocals_.begin ();
3006  mapIter != this->nonlocals_.end ();
3007  ++mapIter, ++curPos) {
3008  const GO gblRow = mapIter->first;
3009  std::vector<GO>& gblCols = mapIter->second; // by ref just to avoid copy
3010  const LO numEnt = static_cast<LO> (numEntPerNonlocalRow[curPos]);
3011  nonlocalGraph->insertGlobalIndices (gblRow, numEnt, gblCols.data ());
3012  }
3013  }
3014  if (verbose_) {
3015  std::ostringstream os;
3016  os << *prefix << "Built nonlocal graph" << endl;
3017  std::cerr << os.str();
3018  }
3019  // There's no need to fill-complete the nonlocals graph.
3020  // We just use it as a temporary container for the Export.
3021 
3022  // 4. If the original row Map is one to one, then we can Export
3023  // directly from nonlocalGraph into this. Otherwise, we have
3024  // to create a temporary graph with a one-to-one row Map,
3025  // Export into that, then Import from the temporary graph into
3026  // *this.
3027 
3028  auto origRowMap = this->getRowMap ();
3029  const bool origRowMapIsOneToOne = origRowMap->isOneToOne ();
3030 
3031  if (origRowMapIsOneToOne) {
3032  if (verbose_) {
3033  std::ostringstream os;
3034  os << *prefix << "Original row Map is 1-to-1" << endl;
3035  std::cerr << os.str();
3036  }
3037  export_type exportToOrig (nonlocalRowMap, origRowMap);
3038  this->doExport (*nonlocalGraph, exportToOrig, Tpetra::INSERT);
3039  // We're done at this point!
3040  }
3041  else {
3042  if (verbose_) {
3043  std::ostringstream os;
3044  os << *prefix << "Original row Map is NOT 1-to-1" << endl;
3045  std::cerr << os.str();
3046  }
3047  // If you ask a Map whether it is one to one, it does some
3048  // communication and stashes intermediate results for later use
3049  // by createOneToOne. Thus, calling createOneToOne doesn't cost
3050  // much more then the original cost of calling isOneToOne.
3051  auto oneToOneRowMap = Tpetra::createOneToOne (origRowMap);
3052  export_type exportToOneToOne (nonlocalRowMap, oneToOneRowMap);
3053 
3054  // Create a temporary graph with the one-to-one row Map.
3055  //
3056  // TODO (mfh 09 Sep 2016) Estimate the number of entries in each
3057  // row, to avoid reallocation during the Export operation.
3058  crs_graph_type oneToOneGraph (oneToOneRowMap, 0);
3059 
3060  // Export from graph of nonlocals into the temp one-to-one graph.
3061  if (verbose_) {
3062  std::ostringstream os;
3063  os << *prefix << "Export nonlocal graph" << endl;
3064  std::cerr << os.str();
3065  }
3066  oneToOneGraph.doExport (*nonlocalGraph, exportToOneToOne, Tpetra::INSERT);
3067 
3068  // We don't need the graph of nonlocals anymore, so get rid of
3069  // it, to keep the memory high-water mark down.
3070  nonlocalGraph = Teuchos::null;
3071 
3072  // Import from the one-to-one graph to the original graph.
3073  import_type importToOrig (oneToOneRowMap, origRowMap);
3074  if (verbose_) {
3075  std::ostringstream os;
3076  os << *prefix << "Import nonlocal graph" << endl;
3077  std::cerr << os.str();
3078  }
3079  this->doImport (oneToOneGraph, importToOrig, Tpetra::INSERT);
3080  }
3081 
3082  // It's safe now to clear out nonlocals_, since we've already
3083  // committed side effects to *this. The standard idiom for
3084  // clearing a Container like std::map, is to swap it with an empty
3085  // Container and let the swapped Container fall out of scope.
3086  decltype (this->nonlocals_) newNonlocals;
3087  std::swap (this->nonlocals_, newNonlocals);
3088 
3089  checkInternalState ();
3090  if (verbose_) {
3091  std::ostringstream os;
3092  os << *prefix << "Done" << endl;
3093  std::cerr << os.str();
3094  }
3095  }
3096 
3097 
3098  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3099  void
3101  resumeFill (const Teuchos::RCP<Teuchos::ParameterList>& params)
3102  {
3103  clearGlobalConstants();
3104  if (params != Teuchos::null) this->setParameterList (params);
3105  // either still sorted/merged or initially sorted/merged
3106  indicesAreSorted_ = true;
3107  noRedundancies_ = true;
3108  fillComplete_ = false;
3109  }
3110 
3111 
3112  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3113  void
3115  fillComplete (const Teuchos::RCP<Teuchos::ParameterList>& params)
3116  {
3117  // If the graph already has domain and range Maps, don't clobber
3118  // them. If it doesn't, use the current row Map for both the
3119  // domain and range Maps.
3120  //
3121  // NOTE (mfh 28 Sep 2014): If the graph was constructed without a
3122  // column Map, and column indices are inserted which are not in
3123  // the row Map on any process, this will cause troubles. However,
3124  // that is not a common case for most applications that we
3125  // encounter, and checking for it might require more
3126  // communication.
3127  Teuchos::RCP<const map_type> domMap = this->getDomainMap ();
3128  if (domMap.is_null ()) {
3129  domMap = this->getRowMap ();
3130  }
3131  Teuchos::RCP<const map_type> ranMap = this->getRangeMap ();
3132  if (ranMap.is_null ()) {
3133  ranMap = this->getRowMap ();
3134  }
3135  this->fillComplete (domMap, ranMap, params);
3136  }
3137 
3138 
3139  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3140  void
3142  fillComplete (const Teuchos::RCP<const map_type>& domainMap,
3143  const Teuchos::RCP<const map_type>& rangeMap,
3144  const Teuchos::RCP<Teuchos::ParameterList>& params)
3145  {
3146  using std::endl;
3147  const char tfecfFuncName[] = "fillComplete: ";
3148  const bool verbose = verbose_;
3149 
3150  std::unique_ptr<std::string> prefix;
3151  if (verbose) {
3152  prefix = this->createPrefix("CrsGraph", "fillComplete");
3153  std::ostringstream os;
3154  os << *prefix << "Start" << endl;
3155  std::cerr << os.str();
3156  }
3157 
3158  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3159  (! isFillActive () || isFillComplete (), std::runtime_error,
3160  "Graph fill state must be active (isFillActive() "
3161  "must be true) before calling fillComplete().");
3162 
3163  const int numProcs = getComm ()->getSize ();
3164 
3165  //
3166  // Read and set parameters
3167  //
3168 
3169  // Does the caller want to sort remote GIDs (within those owned by
3170  // the same process) in makeColMap()?
3171  if (! params.is_null ()) {
3172  if (params->isParameter ("sort column map ghost gids")) {
3173  sortGhostsAssociatedWithEachProcessor_ =
3174  params->get<bool> ("sort column map ghost gids",
3175  sortGhostsAssociatedWithEachProcessor_);
3176  }
3177  else if (params->isParameter ("Sort column Map ghost GIDs")) {
3178  sortGhostsAssociatedWithEachProcessor_ =
3179  params->get<bool> ("Sort column Map ghost GIDs",
3180  sortGhostsAssociatedWithEachProcessor_);
3181  }
3182  }
3183 
3184  // If true, the caller promises that no process did nonlocal
3185  // changes since the last call to fillComplete.
3186  bool assertNoNonlocalInserts = false;
3187  if (! params.is_null ()) {
3188  assertNoNonlocalInserts =
3189  params->get<bool> ("No Nonlocal Changes", assertNoNonlocalInserts);
3190  }
3191 
3192  //
3193  // Allocate indices, if they haven't already been allocated
3194  //
3195  if (! indicesAreAllocated ()) {
3196  if (hasColMap ()) {
3197  // We have a column Map, so use local indices.
3198  allocateIndices (LocalIndices, verbose);
3199  } else {
3200  // We don't have a column Map, so use global indices.
3201  allocateIndices (GlobalIndices, verbose);
3202  }
3203  }
3204 
3205  //
3206  // Do global assembly, if requested and if the communicator
3207  // contains more than one process.
3208  //
3209  const bool mayNeedGlobalAssemble = ! assertNoNonlocalInserts && numProcs > 1;
3210  if (mayNeedGlobalAssemble) {
3211  // This first checks if we need to do global assembly.
3212  // The check costs a single all-reduce.
3213  globalAssemble ();
3214  }
3215  else {
3216  const size_t numNonlocals = nonlocals_.size();
3217  if (verbose) {
3218  std::ostringstream os;
3219  os << *prefix << "Do not need to call globalAssemble; "
3220  "assertNoNonlocalInserts="
3221  << (assertNoNonlocalInserts ? "true" : "false")
3222  << "numProcs=" << numProcs
3223  << ", nonlocals_.size()=" << numNonlocals << endl;
3224  std::cerr << os.str();
3225  }
3226  const int lclNeededGlobalAssemble =
3227  (numProcs > 1 && numNonlocals != 0) ? 1 : 0;
3228  if (lclNeededGlobalAssemble != 0 && verbose) {
3229  std::ostringstream os;
3230  os << *prefix;
3231  Details::Impl::verbosePrintMap(
3232  os, nonlocals_.begin(), nonlocals_.end(),
3233  nonlocals_.size(), "nonlocals_");
3234  std::cerr << os.str() << endl;
3235  }
3236 
3237  if (debug_) {
3238  auto map = this->getMap();
3239  auto comm = map.is_null() ? Teuchos::null : map->getComm();
3240  int gblNeededGlobalAssemble = lclNeededGlobalAssemble;
3241  if (! comm.is_null()) {
3242  using Teuchos::REDUCE_MAX;
3243  using Teuchos::reduceAll;
3244  reduceAll(*comm, REDUCE_MAX, lclNeededGlobalAssemble,
3245  Teuchos::outArg(gblNeededGlobalAssemble));
3246  }
3247  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3248  (gblNeededGlobalAssemble != 0, std::runtime_error,
3249  "nonlocals_.size()=" << numNonlocals << " != 0 on at "
3250  "least one process in the CrsGraph's communicator. This "
3251  "means either that you incorrectly set the "
3252  "\"No Nonlocal Changes\" fillComplete parameter to true, "
3253  "or that you inserted invalid entries. "
3254  "Rerun with the environment variable TPETRA_VERBOSE="
3255  "CrsGraph set to see the entries of nonlocals_ on every "
3256  "MPI process (WARNING: lots of output).");
3257  }
3258  else {
3259  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3260  (lclNeededGlobalAssemble != 0, std::runtime_error,
3261  "nonlocals_.size()=" << numNonlocals << " != 0 on the "
3262  "calling process. This means either that you incorrectly "
3263  "set the \"No Nonlocal Changes\" fillComplete parameter "
3264  "to true, or that you inserted invalid entries. "
3265  "Rerun with the environment "
3266  "variable TPETRA_VERBOSE=CrsGraph set to see the entries "
3267  "of nonlocals_ on every MPI process (WARNING: lots of "
3268  "output).");
3269  }
3270  }
3271 
3272  // Set domain and range Map. This may clear the Import / Export
3273  // objects if the new Maps differ from any old ones.
3274  setDomainRangeMaps (domainMap, rangeMap);
3275 
3276  // If the graph does not already have a column Map (either from
3277  // the user constructor calling the version of the constructor
3278  // that takes a column Map, or from a previous fillComplete call),
3279  // then create it.
3280  Teuchos::Array<int> remotePIDs (0);
3281  const bool mustBuildColMap = ! this->hasColMap ();
3282  if (mustBuildColMap) {
3283  this->makeColMap (remotePIDs); // resized on output
3284  }
3285 
3286  // Make indices local, if they aren't already.
3287  // The method doesn't do any work if the indices are already local.
3288  const std::pair<size_t, std::string> makeIndicesLocalResult =
3289  this->makeIndicesLocal(verbose);
3290 
3291  if (debug_) {
3292  using Details::gathervPrint;
3293  using Teuchos::RCP;
3294  using Teuchos::REDUCE_MIN;
3295  using Teuchos::reduceAll;
3296  using Teuchos::outArg;
3297 
3298  RCP<const map_type> map = this->getMap ();
3299  RCP<const Teuchos::Comm<int> > comm;
3300  if (! map.is_null ()) {
3301  comm = map->getComm ();
3302  }
3303  if (comm.is_null ()) {
3304  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3305  (makeIndicesLocalResult.first != 0, std::runtime_error,
3306  makeIndicesLocalResult.second);
3307  }
3308  else {
3309  const int lclSuccess = (makeIndicesLocalResult.first == 0);
3310  int gblSuccess = 0; // output argument
3311  reduceAll (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
3312  if (gblSuccess != 1) {
3313  std::ostringstream os;
3314  gathervPrint (os, makeIndicesLocalResult.second, *comm);
3315  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3316  (true, std::runtime_error, os.str ());
3317  }
3318  }
3319  }
3320  else {
3321  // TODO (mfh 20 Jul 2017) Instead of throwing here, pass along
3322  // the error state to makeImportExport or
3323  // computeGlobalConstants, which may do all-reduces and thus may
3324  // have the opportunity to communicate that error state.
3325  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3326  (makeIndicesLocalResult.first != 0, std::runtime_error,
3327  makeIndicesLocalResult.second);
3328  }
3329 
3330  // If this process has no indices, then CrsGraph considers it
3331  // already trivially sorted and merged. Thus, this method need
3332  // not be called on all processes in the row Map's communicator.
3333  this->sortAndMergeAllIndices (this->isSorted (), this->isMerged ());
3334 
3335  // Make Import and Export objects, if they haven't been made
3336  // already. If we made a column Map above, reuse information from
3337  // that process to avoid communiation in the Import setup.
3338  this->makeImportExport (remotePIDs, mustBuildColMap);
3339 
3340  // Create the KokkosSparse::StaticCrsGraph, if it doesn't already exist.
3341  this->fillLocalGraph (params);
3342 
3343  const bool callComputeGlobalConstants = params.get () == nullptr ||
3344  params->get ("compute global constants", true);
3345  if (callComputeGlobalConstants) {
3346  this->computeGlobalConstants ();
3347  }
3348  else {
3349  this->computeLocalConstants ();
3350  }
3351  this->fillComplete_ = true;
3352  this->checkInternalState ();
3353 
3354  if (verbose) {
3355  std::ostringstream os;
3356  os << *prefix << "Done" << endl;
3357  std::cerr << os.str();
3358  }
3359  }
3360 
3361 
3362  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3363  void
3365  expertStaticFillComplete (const Teuchos::RCP<const map_type>& domainMap,
3366  const Teuchos::RCP<const map_type>& rangeMap,
3367  const Teuchos::RCP<const import_type>& importer,
3368  const Teuchos::RCP<const export_type>& exporter,
3369  const Teuchos::RCP<Teuchos::ParameterList>& params)
3370  {
3371  const char tfecfFuncName[] = "expertStaticFillComplete: ";
3372 #ifdef HAVE_TPETRA_MMM_TIMINGS
3373  std::string label;
3374  if(!params.is_null())
3375  label = params->get("Timer Label",label);
3376  std::string prefix = std::string("Tpetra ")+ label + std::string(": ");
3377  using Teuchos::TimeMonitor;
3378  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Setup"))));
3379 #endif
3380 
3381 
3382  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3383  domainMap.is_null () || rangeMap.is_null (),
3384  std::runtime_error, "The input domain Map and range Map must be nonnull.");
3385  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3386  isFillComplete () || ! hasColMap (), std::runtime_error, "You may not "
3387  "call this method unless the graph has a column Map.");
3388  auto rowPtrsUnpackedLength = this->getRowPtrsUnpackedHost().extent (0);
3389  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3390  getLocalNumRows () > 0 && rowPtrsUnpackedLength == 0,
3391  std::runtime_error, "The calling process has getLocalNumRows() = "
3392  << getLocalNumRows () << " > 0 rows, but the row offsets array has not "
3393  "been set.");
3394  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3395  static_cast<size_t> (rowPtrsUnpackedLength) != getLocalNumRows () + 1,
3396  std::runtime_error, "The row offsets array has length " <<
3397  rowPtrsUnpackedLength << " != getLocalNumRows()+1 = " <<
3398  (getLocalNumRows () + 1) << ".");
3399 
3400  // Note: We don't need to do the following things which are normally done in fillComplete:
3401  // allocateIndices, globalAssemble, makeColMap, makeIndicesLocal, sortAndMergeAllIndices
3402 
3403  // Constants from allocateIndices
3404  //
3405  // mfh 08 Aug 2014: numAllocForAllRows_ and k_numAllocPerRow_ go
3406  // away once the graph is allocated. expertStaticFillComplete
3407  // either presumes that the graph is allocated, or "allocates" it.
3408  //
3409  // FIXME (mfh 08 Aug 2014) The goal for the Kokkos refactor
3410  // version of CrsGraph is to allocate in the constructor, not
3411  // lazily on first insert. That will make both
3412  // numAllocForAllRows_ and k_numAllocPerRow_ obsolete.
3413  numAllocForAllRows_ = 0;
3414  k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
3415  indicesAreAllocated_ = true;
3416 
3417  // Constants from makeIndicesLocal
3418  //
3419  // The graph has a column Map, so its indices had better be local.
3420  indicesAreLocal_ = true;
3421  indicesAreGlobal_ = false;
3422 
3423  // set domain/range map: may clear the import/export objects
3424 #ifdef HAVE_TPETRA_MMM_TIMINGS
3425  MM = Teuchos::null;
3426  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Maps"))));
3427 #endif
3428  setDomainRangeMaps (domainMap, rangeMap);
3429 
3430  // Presume the user sorted and merged the arrays first
3431  indicesAreSorted_ = true;
3432  noRedundancies_ = true;
3433 
3434  // makeImportExport won't create a new importer/exporter if I set one here first.
3435 #ifdef HAVE_TPETRA_MMM_TIMINGS
3436  MM = Teuchos::null;
3437  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckI"))));
3438 #endif
3439 
3440  importer_ = Teuchos::null;
3441  exporter_ = Teuchos::null;
3442  if (importer != Teuchos::null) {
3443  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3444  ! importer->getSourceMap ()->isSameAs (*getDomainMap ()) ||
3445  ! importer->getTargetMap ()->isSameAs (*getColMap ()),
3446  std::invalid_argument,": importer does not match matrix maps.");
3447  importer_ = importer;
3448 
3449  }
3450 
3451 #ifdef HAVE_TPETRA_MMM_TIMINGS
3452  MM = Teuchos::null;
3453  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckE"))));
3454 #endif
3455 
3456  if (exporter != Teuchos::null) {
3457  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3458  ! exporter->getSourceMap ()->isSameAs (*getRowMap ()) ||
3459  ! exporter->getTargetMap ()->isSameAs (*getRangeMap ()),
3460  std::invalid_argument,": exporter does not match matrix maps.");
3461  exporter_ = exporter;
3462  }
3463 
3464 #ifdef HAVE_TPETRA_MMM_TIMINGS
3465  MM = Teuchos::null;
3466  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXmake"))));
3467 #endif
3468  Teuchos::Array<int> remotePIDs (0); // unused output argument
3469  this->makeImportExport (remotePIDs, false);
3470 
3471 #ifdef HAVE_TPETRA_MMM_TIMINGS
3472  MM = Teuchos::null;
3473  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-fLG"))));
3474 #endif
3475  this->fillLocalGraph (params);
3476 
3477  const bool callComputeGlobalConstants = params.get () == nullptr ||
3478  params->get ("compute global constants", true);
3479 
3480  if (callComputeGlobalConstants) {
3481 #ifdef HAVE_TPETRA_MMM_TIMINGS
3482  MM = Teuchos::null;
3483  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cGC (const)"))));
3484 #endif // HAVE_TPETRA_MMM_TIMINGS
3485  this->computeGlobalConstants ();
3486  }
3487  else {
3488 #ifdef HAVE_TPETRA_MMM_TIMINGS
3489  MM = Teuchos::null;
3490  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cGC (noconst)"))));
3491 #endif // HAVE_TPETRA_MMM_TIMINGS
3492  this->computeLocalConstants ();
3493  }
3494 
3495  fillComplete_ = true;
3496 
3497 #ifdef HAVE_TPETRA_MMM_TIMINGS
3498  MM = Teuchos::null;
3499  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cIS"))));
3500 #endif
3501  checkInternalState ();
3502  }
3503 
3504 
3505  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3506  void
3508  fillLocalGraph (const Teuchos::RCP<Teuchos::ParameterList>& params)
3509  {
3511  typedef typename local_graph_device_type::row_map_type row_map_type;
3512  typedef typename row_map_type::non_const_type non_const_row_map_type;
3513  typedef typename local_graph_device_type::entries_type::non_const_type lclinds_1d_type;
3514  const char tfecfFuncName[] = "fillLocalGraph (called from fillComplete or "
3515  "expertStaticFillComplete): ";
3516  const size_t lclNumRows = this->getLocalNumRows ();
3517 
3518  // This method's goal is to fill in the two arrays (compressed
3519  // sparse row format) that define the sparse graph's structure.
3520 
3521  bool requestOptimizedStorage = true;
3522  if (! params.is_null () && ! params->get ("Optimize Storage", true)) {
3523  requestOptimizedStorage = false;
3524  }
3525 
3526  // The graph's column indices are currently stored in a 1-D
3527  // format, with row offsets in rowPtrsUnpacked_host_ and local column indices
3528  // in k_lclInds1D_.
3529 
3530  if (debug_) {
3531  auto rowPtrsUnpacked = this->getRowPtrsUnpackedHost();
3532  // The graph's array of row offsets must already be allocated.
3533  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3534  (rowPtrsUnpacked.extent (0) == 0, std::logic_error,
3535  "rowPtrsUnpacked_host_ has size zero, but shouldn't");
3536  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3537  (rowPtrsUnpacked.extent (0) != lclNumRows + 1, std::logic_error,
3538  "rowPtrsUnpacked_host_.extent(0) = "
3539  << rowPtrsUnpacked.extent (0) << " != (lclNumRows + 1) = "
3540  << (lclNumRows + 1) << ".");
3541  const size_t numOffsets = rowPtrsUnpacked.extent (0);
3542  const auto valToCheck = rowPtrsUnpacked(numOffsets-1);
3543  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3544  (numOffsets != 0 &&
3545  lclIndsUnpacked_wdv.extent (0) != valToCheck,
3546  std::logic_error, "numOffsets=" << numOffsets << " != 0 "
3547  " and lclIndsUnpacked_wdv.extent(0)=" << lclIndsUnpacked_wdv.extent(0)
3548  << " != rowPtrsUnpacked_host_(" << numOffsets << ")=" << valToCheck
3549  << ".");
3550  }
3551 
3552  size_t allocSize = 0;
3553  try {
3554  allocSize = this->getLocalAllocationSize ();
3555  }
3556  catch (std::logic_error& e) {
3557  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3558  (true, std::logic_error, "getLocalAllocationSize threw "
3559  "std::logic_error: " << e.what ());
3560  }
3561  catch (std::runtime_error& e) {
3562  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3563  (true, std::runtime_error, "getLocalAllocationSize threw "
3564  "std::runtime_error: " << e.what ());
3565  }
3566  catch (std::exception& e) {
3567  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3568  (true, std::runtime_error, "getLocalAllocationSize threw "
3569  "std::exception: " << e.what ());
3570  }
3571  catch (...) {
3572  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3573  (true, std::runtime_error, "getLocalAllocationSize threw "
3574  "an exception not a subclass of std::exception.");
3575  }
3576 
3577  if (this->getLocalNumEntries () != allocSize) {
3578  // Use the nonconst version of row_map_type for ptr_d, because
3579  // the latter is const and we need to modify ptr_d here.
3580  non_const_row_map_type ptr_d;
3581  row_map_type ptr_d_const;
3582 
3583  // The graph's current 1-D storage is "unpacked." This means
3584  // the row offsets may differ from what the final row offsets
3585  // should be. This could happen, for example, if the user set
3586  // an upper bound on the number of entries in each row, but
3587  // didn't fill all those entries.
3588 
3589  if (debug_) {
3590  auto rowPtrsUnpacked = this->getRowPtrsUnpackedHost();
3591  if (rowPtrsUnpacked.extent (0) != 0) {
3592  const size_t numOffsets =
3593  static_cast<size_t> (rowPtrsUnpacked.extent (0));
3594  const auto valToCheck = rowPtrsUnpacked(numOffsets - 1);
3595  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3596  (valToCheck != size_t(lclIndsUnpacked_wdv.extent(0)),
3597  std::logic_error, "(Unpacked branch) Before allocating "
3598  "or packing, k_rowPtrs_(" << (numOffsets-1) << ")="
3599  << valToCheck << " != lclIndsUnpacked_wdv.extent(0)="
3600  << lclIndsUnpacked_wdv.extent (0) << ".");
3601  }
3602  }
3603 
3604  // Pack the row offsets into ptr_d, by doing a sum-scan of the
3605  // array of valid entry counts per row (k_numRowEntries_).
3606 
3607  // Total number of entries in the matrix on the calling
3608  // process. We will compute this in the loop below. It's
3609  // cheap to compute and useful as a sanity check.
3610  size_t lclTotalNumEntries = 0;
3611  {
3612  // Allocate the packed row offsets array.
3613  ptr_d =
3614  non_const_row_map_type ("Tpetra::CrsGraph::ptr", lclNumRows + 1);
3615  ptr_d_const = ptr_d;
3616 
3617  // It's ok that k_numRowEntries_ is a host View; the
3618  // function can handle this.
3619  typename num_row_entries_type::const_type numRowEnt_h = k_numRowEntries_;
3620  if (debug_) {
3621  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3622  (size_t(numRowEnt_h.extent (0)) != lclNumRows,
3623  std::logic_error, "(Unpacked branch) "
3624  "numRowEnt_h.extent(0)=" << numRowEnt_h.extent(0)
3625  << " != getLocalNumRows()=" << lclNumRows << "");
3626  }
3627 
3628  lclTotalNumEntries = computeOffsetsFromCounts (ptr_d, numRowEnt_h);
3629 
3630  if (debug_) {
3631  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3632  (static_cast<size_t> (ptr_d.extent (0)) != lclNumRows + 1,
3633  std::logic_error, "(Unpacked branch) After allocating "
3634  "ptr_d, ptr_d.extent(0) = " << ptr_d.extent(0)
3635  << " != lclNumRows+1 = " << (lclNumRows+1) << ".");
3636  const auto valToCheck =
3637  ::Tpetra::Details::getEntryOnHost (ptr_d, lclNumRows);
3638  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3639  (valToCheck != lclTotalNumEntries, std::logic_error,
3640  "Tpetra::CrsGraph::fillLocalGraph: In unpacked branch, "
3641  "after filling ptr_d, ptr_d(lclNumRows=" << lclNumRows
3642  << ") = " << valToCheck << " != total number of entries "
3643  "on the calling process = " << lclTotalNumEntries
3644  << ".");
3645  }
3646  }
3647 
3648  // Allocate the array of packed column indices.
3649  lclinds_1d_type ind_d =
3650  lclinds_1d_type ("Tpetra::CrsGraph::lclInd", lclTotalNumEntries);
3651 
3652  // k_rowPtrs_ and lclIndsUnpacked_wdv are currently unpacked. Pack
3653  // them, using the packed row offsets array ptr_d that we
3654  // created above.
3655  //
3656  // FIXME (mfh 08 Aug 2014) If "Optimize Storage" is false (in
3657  // CrsMatrix?), we need to keep around the unpacked row
3658  // offsets and column indices.
3659 
3660  // Pack the column indices from unpacked lclIndsUnpacked_wdv into
3661  // packed ind_d. We will replace lclIndsUnpacked_wdv below.
3662  typedef pack_functor<
3663  typename local_graph_device_type::entries_type::non_const_type,
3664  typename local_inds_dualv_type::t_dev::const_type,
3665  row_map_type,
3666  typename local_graph_device_type::row_map_type> inds_packer_type;
3667  inds_packer_type f (ind_d,
3668  lclIndsUnpacked_wdv.getDeviceView(Access::ReadOnly),
3669  ptr_d, this->getRowPtrsUnpackedDevice());
3670  {
3671  typedef typename decltype (ind_d)::execution_space exec_space;
3672  typedef Kokkos::RangePolicy<exec_space, LocalOrdinal> range_type;
3673  Kokkos::parallel_for (range_type (0, lclNumRows), f);
3674  }
3675 
3676  if (debug_) {
3677  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3678  (ptr_d.extent (0) == 0, std::logic_error,
3679  "(\"Optimize Storage\"=true branch) After packing, "
3680  "ptr_d.extent(0)=0.");
3681  if (ptr_d.extent (0) != 0) {
3682  const size_t numOffsets = static_cast<size_t> (ptr_d.extent (0));
3683  const auto valToCheck =
3684  ::Tpetra::Details::getEntryOnHost (ptr_d, numOffsets - 1);
3685  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3686  (static_cast<size_t> (valToCheck) != ind_d.extent (0),
3687  std::logic_error, "(\"Optimize Storage\"=true branch) "
3688  "After packing, ptr_d(" << (numOffsets-1) << ")="
3689  << valToCheck << " != ind_d.extent(0)="
3690  << ind_d.extent(0) << ".");
3691  }
3692  }
3693  // Build the local graph.
3694  if (requestOptimizedStorage)
3695  setRowPtrs(ptr_d_const);
3696  else
3697  setRowPtrsPacked(ptr_d_const);
3698  lclIndsPacked_wdv = local_inds_wdv_type(ind_d);
3699  }
3700  else { // We don't have to pack, so just set the pointers.
3701  //Set both packed and unpacked rowptrs to this
3702  this->setRowPtrs(rowPtrsUnpacked_dev_);
3703  lclIndsPacked_wdv = lclIndsUnpacked_wdv;
3704 
3705  if (debug_) {
3706  auto rowPtrsPacked_dev = this->getRowPtrsPackedDevice();
3707  auto rowPtrsPacked_host = this->getRowPtrsPackedHost();
3708  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3709  (rowPtrsPacked_dev.extent (0) == 0, std::logic_error,
3710  "(\"Optimize Storage\"=false branch) "
3711  "rowPtrsPacked_dev_.extent(0) = 0.");
3712  if (rowPtrsPacked_dev.extent (0) != 0) {
3713  const size_t numOffsets =
3714  static_cast<size_t> (rowPtrsPacked_dev.extent (0));
3715  const size_t valToCheck =
3716  rowPtrsPacked_host(numOffsets - 1);
3717  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3718  (valToCheck != size_t(lclIndsPacked_wdv.extent (0)),
3719  std::logic_error, "(\"Optimize Storage\"=false branch) "
3720  "rowPtrsPacked_dev_(" << (numOffsets-1) << ")="
3721  << valToCheck
3722  << " != lclIndsPacked_wdv.extent(0)="
3723  << lclIndsPacked_wdv.extent (0) << ".");
3724  }
3725  }
3726  }
3727 
3728  if (debug_) {
3729  auto rowPtrsPacked_dev = this->getRowPtrsPackedDevice();
3730  auto rowPtrsPacked_host = this->getRowPtrsPackedHost();
3731  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3732  (static_cast<size_t> (rowPtrsPacked_dev.extent (0)) != lclNumRows + 1,
3733  std::logic_error, "After packing, rowPtrsPacked_dev_.extent(0) = " <<
3734  rowPtrsPacked_dev.extent (0) << " != lclNumRows+1 = " << (lclNumRows+1)
3735  << ".");
3736  if (rowPtrsPacked_dev.extent (0) != 0) {
3737  const size_t numOffsets = static_cast<size_t> (rowPtrsPacked_dev.extent (0));
3738  const auto valToCheck = rowPtrsPacked_host(numOffsets - 1);
3739  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3740  (static_cast<size_t> (valToCheck) != lclIndsPacked_wdv.extent (0),
3741  std::logic_error, "After packing, rowPtrsPacked_dev_(" << (numOffsets-1)
3742  << ") = " << valToCheck << " != lclIndsPacked_wdv.extent(0) = "
3743  << lclIndsPacked_wdv.extent (0) << ".");
3744  }
3745  }
3746 
3747  if (requestOptimizedStorage) {
3748  // With optimized storage, we don't need to store
3749  // the array of row entry counts.
3750 
3751  // Free graph data structures that are only needed for
3752  // unpacked 1-D storage.
3753  k_numRowEntries_ = num_row_entries_type ();
3754 
3755  // Keep the new 1-D packed allocations.
3756  lclIndsUnpacked_wdv = lclIndsPacked_wdv;
3757 
3758  storageStatus_ = Details::STORAGE_1D_PACKED;
3759  }
3760 
3761  set_need_sync_host_uvm_access(); // make sure kernel setup of indices is fenced before a host access
3762  }
3763 
3764  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3765  void
3767  replaceColMap (const Teuchos::RCP<const map_type>& newColMap)
3768  {
3769  // NOTE: This safety check matches the code, but not the documentation of Crsgraph
3770  //
3771  // FIXME (mfh 18 Aug 2014) This will break if the calling process
3772  // has no entries, because in that case, currently it is neither
3773  // locally nor globally indexed. This will change once we get rid
3774  // of lazy allocation (so that the constructor allocates indices
3775  // and therefore commits to local vs. global).
3776  const char tfecfFuncName[] = "replaceColMap: ";
3777  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3778  isLocallyIndexed () || isGloballyIndexed (), std::runtime_error,
3779  "Requires matching maps and non-static graph.");
3780  colMap_ = newColMap;
3781  }
3782 
3783  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3784  void
3786  reindexColumns (const Teuchos::RCP<const map_type>& newColMap,
3787  const Teuchos::RCP<const import_type>& newImport,
3788  const bool sortIndicesInEachRow)
3789  {
3790  using Teuchos::REDUCE_MIN;
3791  using Teuchos::reduceAll;
3792  using Teuchos::RCP;
3793  typedef GlobalOrdinal GO;
3794  typedef LocalOrdinal LO;
3795  typedef typename local_inds_dualv_type::t_host col_inds_type;
3796  const char tfecfFuncName[] = "reindexColumns: ";
3797 
3798  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3799  isFillComplete (), std::runtime_error, "The graph is fill complete "
3800  "(isFillComplete() returns true). You must call resumeFill() before "
3801  "you may call this method.");
3802 
3803  // mfh 19 Aug 2014: This method does NOT redistribute data; it
3804  // doesn't claim to do the work of an Import or Export. This
3805  // means that for all processes, the calling process MUST own all
3806  // column indices, in both the old column Map (if it exists) and
3807  // the new column Map. We check this via an all-reduce.
3808  //
3809  // Some processes may be globally indexed, others may be locally
3810  // indexed, and others (that have no graph entries) may be
3811  // neither. This method will NOT change the graph's current
3812  // state. If it's locally indexed, it will stay that way, and
3813  // vice versa. It would easy to add an option to convert indices
3814  // from global to local, so as to save a global-to-local
3815  // conversion pass. However, we don't do this here. The intended
3816  // typical use case is that the graph already has a column Map and
3817  // is locally indexed, and this is the case for which we optimize.
3818 
3819  const LO lclNumRows = static_cast<LO> (this->getLocalNumRows ());
3820 
3821  // Attempt to convert indices to the new column Map's version of
3822  // local. This will fail if on the calling process, the graph has
3823  // indices that are not on that process in the new column Map.
3824  // After the local conversion attempt, we will do an all-reduce to
3825  // see if any processes failed.
3826 
3827  // If this is false, then either the graph contains a column index
3828  // which is invalid in the CURRENT column Map, or the graph is
3829  // locally indexed but currently has no column Map. In either
3830  // case, there is no way to convert the current local indices into
3831  // global indices, so that we can convert them into the new column
3832  // Map's local indices. It's possible for this to be true on some
3833  // processes but not others, due to replaceColMap.
3834  bool allCurColIndsValid = true;
3835  // On the calling process, are all valid current column indices
3836  // also in the new column Map on the calling process? In other
3837  // words, does local reindexing suffice, or should the user have
3838  // done an Import or Export instead?
3839  bool localSuffices = true;
3840 
3841  // Final arrays for the local indices. We will allocate exactly
3842  // one of these ONLY if the graph is locally indexed on the
3843  // calling process, and ONLY if the graph has one or more entries
3844  // (is not empty) on the calling process. In that case, we
3845  // allocate the first (1-D storage) if the graph has a static
3846  // profile, else we allocate the second (2-D storage).
3847  col_inds_type newLclInds1D;
3848  auto oldLclInds1D = lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
3849 
3850  // If indices aren't allocated, that means the calling process
3851  // owns no entries in the graph. Thus, there is nothing to
3852  // convert, and it trivially succeeds locally.
3853  if (indicesAreAllocated ()) {
3854  if (isLocallyIndexed ()) {
3855  if (hasColMap ()) { // locally indexed, and currently has a column Map
3856  const map_type& oldColMap = * (getColMap ());
3857  // Allocate storage for the new local indices.
3858  const size_t allocSize = this->getLocalAllocationSize ();
3859  newLclInds1D = col_inds_type("Tpetra::CrsGraph::lclIndsReindexedHost",
3860  allocSize);
3861  // Attempt to convert the new indices locally.
3862  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
3863  const RowInfo rowInfo = this->getRowInfo (lclRow);
3864  const size_t beg = rowInfo.offset1D;
3865  const size_t end = beg + rowInfo.numEntries;
3866  for (size_t k = beg; k < end; ++k) {
3867  const LO oldLclCol = oldLclInds1D(k);
3868  if (oldLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
3869  allCurColIndsValid = false;
3870  break; // Stop at the first invalid index
3871  }
3872  const GO gblCol = oldColMap.getGlobalElement (oldLclCol);
3873 
3874  // The above conversion MUST succeed. Otherwise, the
3875  // current local index is invalid, which means that
3876  // the graph was constructed incorrectly.
3877  if (gblCol == Teuchos::OrdinalTraits<GO>::invalid ()) {
3878  allCurColIndsValid = false;
3879  break; // Stop at the first invalid index
3880  }
3881  else {
3882  const LO newLclCol = newColMap->getLocalElement (gblCol);
3883  if (newLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
3884  localSuffices = false;
3885  break; // Stop at the first invalid index
3886  }
3887  newLclInds1D(k) = newLclCol;
3888  }
3889  } // for each entry in the current row
3890  } // for each locally owned row
3891  }
3892  else { // locally indexed, but no column Map
3893  // This case is only possible if replaceColMap() was called
3894  // with a null argument on the calling process. It's
3895  // possible, but it means that this method can't possibly
3896  // succeed, since we have no way of knowing how to convert
3897  // the current local indices to global indices.
3898  allCurColIndsValid = false;
3899  }
3900  }
3901  else { // globally indexed
3902  // If the graph is globally indexed, we don't need to save
3903  // local indices, but we _do_ need to know whether the current
3904  // global indices are valid in the new column Map. We may
3905  // need to do a getRemoteIndexList call to find this out.
3906  //
3907  // In this case, it doesn't matter whether the graph currently
3908  // has a column Map. We don't need the old column Map to
3909  // convert from global indices to the _new_ column Map's local
3910  // indices. Furthermore, we can use the same code, whether
3911  // the graph is static or dynamic profile.
3912 
3913  // Test whether the current global indices are in the new
3914  // column Map on the calling process.
3915  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
3916  const RowInfo rowInfo = this->getRowInfo (lclRow);
3917  auto oldGblRowView = this->getGlobalIndsViewHost (rowInfo);
3918  for (size_t k = 0; k < rowInfo.numEntries; ++k) {
3919  const GO gblCol = oldGblRowView(k);
3920  if (! newColMap->isNodeGlobalElement (gblCol)) {
3921  localSuffices = false;
3922  break; // Stop at the first invalid index
3923  }
3924  } // for each entry in the current row
3925  } // for each locally owned row
3926  } // locally or globally indexed
3927  } // whether indices are allocated
3928 
3929  // Do an all-reduce to check both possible error conditions.
3930  int lclSuccess[2];
3931  lclSuccess[0] = allCurColIndsValid ? 1 : 0;
3932  lclSuccess[1] = localSuffices ? 1 : 0;
3933  int gblSuccess[2];
3934  gblSuccess[0] = 0;
3935  gblSuccess[1] = 0;
3936  RCP<const Teuchos::Comm<int> > comm =
3937  getRowMap ().is_null () ? Teuchos::null : getRowMap ()->getComm ();
3938  if (! comm.is_null ()) {
3939  reduceAll<int, int> (*comm, REDUCE_MIN, 2, lclSuccess, gblSuccess);
3940  }
3941 
3942  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3943  gblSuccess[0] == 0, std::runtime_error, "It is not possible to continue."
3944  " The most likely reason is that the graph is locally indexed, but the "
3945  "column Map is missing (null) on some processes, due to a previous call "
3946  "to replaceColMap().");
3947 
3948  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3949  gblSuccess[1] == 0, std::runtime_error, "On some process, the graph "
3950  "contains column indices that are in the old column Map, but not in the "
3951  "new column Map (on that process). This method does NOT redistribute "
3952  "data; it does not claim to do the work of an Import or Export operation."
3953  " This means that for all processess, the calling process MUST own all "
3954  "column indices, in both the old column Map and the new column Map. In "
3955  "this case, you will need to do an Import or Export operation to "
3956  "redistribute data.");
3957 
3958  // Commit the results.
3959  if (isLocallyIndexed ()) {
3960  { // scope the device view; sortAndMergeAllIndices needs host
3961  typename local_inds_dualv_type::t_dev newLclInds1D_dev(
3962  Kokkos::view_alloc("Tpetra::CrsGraph::lclIndReindexed",
3963  Kokkos::WithoutInitializing),
3964  newLclInds1D.extent(0));
3965  Kokkos::deep_copy(newLclInds1D_dev, newLclInds1D);
3966  lclIndsUnpacked_wdv = local_inds_wdv_type(newLclInds1D_dev);
3967  }
3968 
3969  // We've reindexed, so we don't know if the indices are sorted.
3970  //
3971  // FIXME (mfh 17 Sep 2014) It could make sense to check this,
3972  // since we're already going through all the indices above. We
3973  // could also sort each row in place; that way, we would only
3974  // have to make one pass over the rows.
3975  indicesAreSorted_ = false;
3976  if (sortIndicesInEachRow) {
3977  // NOTE (mfh 17 Sep 2014) The graph must be locally indexed in
3978  // order to call this method.
3979  //
3980  // FIXME (mfh 17 Sep 2014) This violates the strong exception
3981  // guarantee. It would be better to sort the new index arrays
3982  // before committing them.
3983  const bool sorted = false; // need to resort
3984  const bool merged = true; // no need to merge, since no dups
3985  this->sortAndMergeAllIndices (sorted, merged);
3986  }
3987  }
3988  colMap_ = newColMap;
3989 
3990  if (newImport.is_null ()) {
3991  // FIXME (mfh 19 Aug 2014) Should use the above all-reduce to
3992  // check whether the input Import is null on any process.
3993  //
3994  // If the domain Map hasn't been set yet, we can't compute a new
3995  // Import object. Leave it what it is; it should be null, but
3996  // it doesn't matter. If the domain Map _has_ been set, then
3997  // compute a new Import object if necessary.
3998  if (! domainMap_.is_null ()) {
3999  if (! domainMap_->isSameAs (* newColMap)) {
4000  importer_ = Teuchos::rcp (new import_type (domainMap_, newColMap));
4001  } else {
4002  importer_ = Teuchos::null; // don't need an Import
4003  }
4004  }
4005  } else {
4006  // The caller gave us an Import object. Assume that it's valid.
4007  importer_ = newImport;
4008  }
4009  }
4010 
4011  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4012  void
4014  replaceDomainMap (const Teuchos::RCP<const map_type>& newDomainMap)
4015  {
4016  const char prefix[] = "Tpetra::CrsGraph::replaceDomainMap: ";
4017  TEUCHOS_TEST_FOR_EXCEPTION(
4018  colMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4019  "this method unless the graph already has a column Map.");
4020  TEUCHOS_TEST_FOR_EXCEPTION(
4021  newDomainMap.is_null (), std::invalid_argument,
4022  prefix << "The new domain Map must be nonnull.");
4023 
4024  // Create a new importer, if needed
4025  Teuchos::RCP<const import_type> newImporter = Teuchos::null;
4026  if (newDomainMap != colMap_ && (! newDomainMap->isSameAs (*colMap_))) {
4027  newImporter = rcp(new import_type(newDomainMap, colMap_));
4028  }
4029  this->replaceDomainMapAndImporter(newDomainMap, newImporter);
4030  }
4031 
4032  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4033  void
4035  replaceDomainMapAndImporter (const Teuchos::RCP<const map_type>& newDomainMap,
4036  const Teuchos::RCP<const import_type>& newImporter)
4037  {
4038  const char prefix[] = "Tpetra::CrsGraph::replaceDomainMapAndImporter: ";
4039  TEUCHOS_TEST_FOR_EXCEPTION(
4040  colMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4041  "this method unless the graph already has a column Map.");
4042  TEUCHOS_TEST_FOR_EXCEPTION(
4043  newDomainMap.is_null (), std::invalid_argument,
4044  prefix << "The new domain Map must be nonnull.");
4045 
4046  if (debug_) {
4047  if (newImporter.is_null ()) {
4048  // It's not a good idea to put expensive operations in a macro
4049  // clause, even if they are side effect - free, because macros
4050  // don't promise that they won't evaluate their arguments more
4051  // than once. It's polite for them to do so, but not required.
4052  const bool colSameAsDom = colMap_->isSameAs (*newDomainMap);
4053  TEUCHOS_TEST_FOR_EXCEPTION
4054  (!colSameAsDom, std::invalid_argument, "If the new Import is null, "
4055  "then the new domain Map must be the same as the current column Map.");
4056  }
4057  else {
4058  const bool colSameAsTgt =
4059  colMap_->isSameAs (* (newImporter->getTargetMap ()));
4060  const bool newDomSameAsSrc =
4061  newDomainMap->isSameAs (* (newImporter->getSourceMap ()));
4062  TEUCHOS_TEST_FOR_EXCEPTION
4063  (! colSameAsTgt || ! newDomSameAsSrc, std::invalid_argument, "If the "
4064  "new Import is nonnull, then the current column Map must be the same "
4065  "as the new Import's target Map, and the new domain Map must be the "
4066  "same as the new Import's source Map.");
4067  }
4068  }
4069 
4070  domainMap_ = newDomainMap;
4071  importer_ = Teuchos::rcp_const_cast<import_type> (newImporter);
4072  }
4073 
4074  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4075  void
4077  replaceRangeMap (const Teuchos::RCP<const map_type>& newRangeMap)
4078  {
4079  const char prefix[] = "Tpetra::CrsGraph::replaceRangeMap: ";
4080  TEUCHOS_TEST_FOR_EXCEPTION(
4081  rowMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4082  "this method unless the graph already has a row Map.");
4083  TEUCHOS_TEST_FOR_EXCEPTION(
4084  newRangeMap.is_null (), std::invalid_argument,
4085  prefix << "The new range Map must be nonnull.");
4086 
4087  // Create a new exporter, if needed
4088  Teuchos::RCP<const export_type> newExporter = Teuchos::null;
4089  if (newRangeMap != rowMap_ && (! newRangeMap->isSameAs (*rowMap_))) {
4090  newExporter = rcp(new export_type(rowMap_, newRangeMap));
4091  }
4092  this->replaceRangeMapAndExporter(newRangeMap, newExporter);
4093  }
4094 
4095  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4096  void
4098  replaceRangeMapAndExporter (const Teuchos::RCP<const map_type>& newRangeMap,
4099  const Teuchos::RCP<const export_type>& newExporter)
4100  {
4101  const char prefix[] = "Tpetra::CrsGraph::replaceRangeMapAndExporter: ";
4102  TEUCHOS_TEST_FOR_EXCEPTION(
4103  rowMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4104  "this method unless the graph already has a column Map.");
4105  TEUCHOS_TEST_FOR_EXCEPTION(
4106  newRangeMap.is_null (), std::invalid_argument,
4107  prefix << "The new domain Map must be nonnull.");
4108 
4109  if (debug_) {
4110  if (newExporter.is_null ()) {
4111  // It's not a good idea to put expensive operations in a macro
4112  // clause, even if they are side effect - free, because macros
4113  // don't promise that they won't evaluate their arguments more
4114  // than once. It's polite for them to do so, but not required.
4115  const bool rowSameAsRange = rowMap_->isSameAs (*newRangeMap);
4116  TEUCHOS_TEST_FOR_EXCEPTION
4117  (!rowSameAsRange, std::invalid_argument, "If the new Export is null, "
4118  "then the new range Map must be the same as the current row Map.");
4119  }
4120  else {
4121  const bool newRangeSameAsTgt =
4122  newRangeMap->isSameAs (* (newExporter->getTargetMap ()));
4123  const bool rowSameAsSrc =
4124  rowMap_->isSameAs (* (newExporter->getSourceMap ()));
4125  TEUCHOS_TEST_FOR_EXCEPTION
4126  (! rowSameAsSrc || ! newRangeSameAsTgt, std::invalid_argument, "If the "
4127  "new Export is nonnull, then the current row Map must be the same "
4128  "as the new Export's source Map, and the new range Map must be the "
4129  "same as the new Export's target Map.");
4130  }
4131  }
4132 
4133  rangeMap_ = newRangeMap;
4134  exporter_ = Teuchos::rcp_const_cast<export_type> (newExporter);
4135  }
4136 
4137 
4138  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4142  {
4143  return local_graph_device_type(
4144  lclIndsPacked_wdv.getDeviceView(Access::ReadWrite),
4145  this->getRowPtrsPackedDevice());
4146  }
4147 
4148  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4151  getLocalGraphHost () const
4152  {
4153  return local_graph_host_type(
4154  lclIndsPacked_wdv.getHostView(Access::ReadWrite),
4155  this->getRowPtrsPackedHost());
4156  }
4157 
4158  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4159  void
4162  {
4163  using ::Tpetra::Details::ProfilingRegion;
4164  using Teuchos::ArrayView;
4165  using Teuchos::outArg;
4166  using Teuchos::reduceAll;
4167  typedef global_size_t GST;
4168 
4169  ProfilingRegion regionCGC ("Tpetra::CrsGraph::computeGlobalConstants");
4170 
4171  this->computeLocalConstants ();
4172 
4173  // Compute global constants from local constants. Processes that
4174  // already have local constants still participate in the
4175  // all-reduces, using their previously computed values.
4176  if (! this->haveGlobalConstants_) {
4177  const Teuchos::Comm<int>& comm = * (this->getComm ());
4178  // Promote all the nodeNum* and nodeMaxNum* quantities from
4179  // size_t to global_size_t, when doing the all-reduces for
4180  // globalNum* / globalMaxNum* results.
4181  //
4182  // FIXME (mfh 07 May 2013) Unfortunately, we either have to do
4183  // this in two all-reduces (one for the sum and the other for
4184  // the max), or use a custom MPI_Op that combines the sum and
4185  // the max. The latter might even be slower than two
4186  // all-reduces on modern network hardware. It would also be a
4187  // good idea to use nonblocking all-reduces (MPI 3), so that we
4188  // don't have to wait around for the first one to finish before
4189  // starting the second one.
4190  GST lcl, gbl;
4191  lcl = static_cast<GST> (this->getLocalNumEntries ());
4192 
4193  reduceAll<int,GST> (comm, Teuchos::REDUCE_SUM, 1, &lcl, &gbl);
4194  this->globalNumEntries_ = gbl;
4195 
4196  const GST lclMaxNumRowEnt = static_cast<GST> (this->nodeMaxNumRowEntries_);
4197  reduceAll<int, GST> (comm, Teuchos::REDUCE_MAX, lclMaxNumRowEnt,
4198  outArg (this->globalMaxNumRowEntries_));
4199  this->haveGlobalConstants_ = true;
4200  }
4201  }
4202 
4203 
4204  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4205  void
4208  {
4209  using ::Tpetra::Details::ProfilingRegion;
4210 
4211  ProfilingRegion regionCLC ("Tpetra::CrsGraph::computeLocalConstants");
4212  if (this->haveLocalConstants_) {
4213  return;
4214  }
4215 
4216  // Reset local properties
4217  this->nodeMaxNumRowEntries_ =
4218  Teuchos::OrdinalTraits<size_t>::invalid();
4219 
4220  using LO = local_ordinal_type;
4221 
4222  auto ptr = this->getRowPtrsPackedDevice();
4223  const LO lclNumRows = ptr.extent(0) == 0 ?
4224  static_cast<LO> (0) :
4225  (static_cast<LO> (ptr.extent(0)) - static_cast<LO> (1));
4226 
4227  const LO lclMaxNumRowEnt =
4228  ::Tpetra::Details::maxDifference ("Tpetra::CrsGraph: nodeMaxNumRowEntries",
4229  ptr, lclNumRows);
4230  this->nodeMaxNumRowEntries_ = static_cast<size_t> (lclMaxNumRowEnt);
4231  this->haveLocalConstants_ = true;
4232  }
4233 
4234 
4235  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4236  std::pair<size_t, std::string>
4238  makeIndicesLocal (const bool verbose)
4239  {
4241  using Teuchos::arcp;
4242  using Teuchos::Array;
4243  using std::endl;
4244  typedef LocalOrdinal LO;
4245  typedef GlobalOrdinal GO;
4246  typedef device_type DT;
4247  typedef typename local_graph_device_type::row_map_type::non_const_value_type offset_type;
4248  typedef typename num_row_entries_type::non_const_value_type num_ent_type;
4249  const char tfecfFuncName[] = "makeIndicesLocal: ";
4250  ProfilingRegion regionMakeIndicesLocal ("Tpetra::CrsGraph::makeIndicesLocal");
4251 
4252  std::unique_ptr<std::string> prefix;
4253  if (verbose) {
4254  prefix = this->createPrefix("CrsGraph", "makeIndicesLocal");
4255  std::ostringstream os;
4256  os << *prefix << "lclNumRows: " << getLocalNumRows() << endl;
4257  std::cerr << os.str();
4258  }
4259 
4260  // These are somewhat global properties, so it's safe to have
4261  // exception checks for them, rather than returning an error code.
4262  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4263  (! this->hasColMap (), std::logic_error, "The graph does not have a "
4264  "column Map yet. This method should never be called in that case. "
4265  "Please report this bug to the Tpetra developers.");
4266  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4267  (this->getColMap ().is_null (), std::logic_error, "The graph claims "
4268  "that it has a column Map, because hasColMap() returns true. However, "
4269  "the result of getColMap() is null. This should never happen. Please "
4270  "report this bug to the Tpetra developers.");
4271 
4272  // Return value 1: The number of column indices (counting
4273  // duplicates) that could not be converted to local indices,
4274  // because they were not in the column Map on the calling process.
4275  size_t lclNumErrs = 0;
4276  std::ostringstream errStrm; // for return value 2 (error string)
4277 
4278  const LO lclNumRows = static_cast<LO> (this->getLocalNumRows ());
4279  const map_type& colMap = * (this->getColMap ());
4280 
4281  if (this->isGloballyIndexed () && lclNumRows != 0) {
4282  // This is a host-accessible View.
4283  typename num_row_entries_type::const_type h_numRowEnt =
4284  this->k_numRowEntries_;
4285 
4286  auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
4287 
4288  // Allocate space for local indices.
4289  if (rowPtrsUnpacked_host.extent(0) == 0) {
4290  errStrm << "Unpacked row pointers (rowPtrsUnpacked_dev_) has length 0. This should never "
4291  "happen here. Please report this bug to the Tpetra developers."
4292  << endl;
4293  // Need to return early.
4294  return std::make_pair(Tpetra::Details::OrdinalTraits<size_t>::invalid (),
4295  errStrm.str ());
4296  }
4297  const auto numEnt = rowPtrsUnpacked_host(lclNumRows);
4298 
4299  // mfh 17 Dec 2016: We don't need initial zero-fill of
4300  // lclIndsUnpacked_wdv, because we will fill it below anyway.
4301  // AllowPadding would only help for aligned access (e.g.,
4302  // for vectorization) if we also were to pad each row to the
4303  // same alignment, so we'll skip AllowPadding for now.
4304 
4305  // using Kokkos::AllowPadding;
4306  using Kokkos::view_alloc;
4307  using Kokkos::WithoutInitializing;
4308 
4309  // When giving the label as an argument to
4310  // Kokkos::view_alloc, the label must be a string and not a
4311  // char*, else the code won't compile. This is because
4312  // view_alloc also allows a raw pointer as its first
4313  // argument. See
4314  // https://github.com/kokkos/kokkos/issues/434. This is a
4315  // large allocation typically, so the overhead of creating
4316  // an std::string is minor.
4317  const std::string label ("Tpetra::CrsGraph::lclInd");
4318  if (verbose) {
4319  std::ostringstream os;
4320  os << *prefix << "(Re)allocate lclInd_wdv: old="
4321  << lclIndsUnpacked_wdv.extent(0) << ", new=" << numEnt << endl;
4322  std::cerr << os.str();
4323  }
4324 
4325  local_inds_dualv_type lclInds_dualv =
4326  local_inds_dualv_type(view_alloc(label, WithoutInitializing),
4327  numEnt);
4328  lclIndsUnpacked_wdv = local_inds_wdv_type(lclInds_dualv);
4329 
4330  auto lclColMap = colMap.getLocalMap ();
4331  // This is a "device mirror" of the host View h_numRowEnt.
4332  //
4333  // NOTE (mfh 27 Sep 2016) Currently, the right way to get a
4334  // Device instance is to use its default constructor. See the
4335  // following Kokkos issue:
4336  //
4337  // https://github.com/kokkos/kokkos/issues/442
4338  if (verbose) {
4339  std::ostringstream os;
4340  os << *prefix << "Allocate device mirror k_numRowEnt: "
4341  << h_numRowEnt.extent(0) << endl;
4342  std::cerr << os.str();
4343  }
4344  auto k_numRowEnt =
4345  Kokkos::create_mirror_view_and_copy (device_type (), h_numRowEnt);
4346 
4348  lclNumErrs =
4349  convertColumnIndicesFromGlobalToLocal<LO, GO, DT, offset_type, num_ent_type> (
4350  lclIndsUnpacked_wdv.getDeviceView(Access::OverwriteAll),
4351  gblInds_wdv.getDeviceView(Access::ReadOnly),
4352  this->getRowPtrsUnpackedDevice(),
4353  lclColMap,
4354  k_numRowEnt);
4355  if (lclNumErrs != 0) {
4356  const int myRank = [this] () {
4357  auto map = this->getMap ();
4358  if (map.is_null ()) {
4359  return 0;
4360  }
4361  else {
4362  auto comm = map->getComm ();
4363  return comm.is_null () ? 0 : comm->getRank ();
4364  }
4365  } ();
4366  const bool pluralNumErrs = (lclNumErrs != static_cast<size_t> (1));
4367  errStrm << "(Process " << myRank << ") When converting column "
4368  "indices from global to local, we encountered " << lclNumErrs
4369  << " ind" << (pluralNumErrs ? "ices" : "ex")
4370  << " that do" << (pluralNumErrs ? "es" : "")
4371  << " not live in the column Map on this process." << endl;
4372  }
4373 
4374  // We've converted column indices from global to local, so we
4375  // can deallocate the global column indices (which we know are
4376  // in 1-D storage, because the graph has static profile).
4377  if (verbose) {
4378  std::ostringstream os;
4379  os << *prefix << "Free gblInds_wdv: "
4380  << gblInds_wdv.extent(0) << endl;
4381  std::cerr << os.str();
4382  }
4383  gblInds_wdv = global_inds_wdv_type ();
4384  } // globallyIndexed() && lclNumRows > 0
4385 
4386  this->indicesAreLocal_ = true;
4387  this->indicesAreGlobal_ = false;
4388  this->checkInternalState ();
4389 
4390  return std::make_pair (lclNumErrs, errStrm.str ());
4391  }
4392 
4393  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4394  void
4396  makeColMap (Teuchos::Array<int>& remotePIDs)
4397  {
4399  using std::endl;
4400  const char tfecfFuncName[] = "makeColMap";
4401 
4402  ProfilingRegion regionSortAndMerge ("Tpetra::CrsGraph::makeColMap");
4403  std::unique_ptr<std::string> prefix;
4404  if (verbose_) {
4405  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4406  std::ostringstream os;
4407  os << *prefix << "Start" << endl;
4408  std::cerr << os.str();
4409  }
4410 
4411  // this->colMap_ should be null at this point, but we accept the
4412  // future possibility that it might not be (esp. if we decide
4413  // later to support graph structure changes after first
4414  // fillComplete, which CrsGraph does not currently (as of 12 Feb
4415  // 2017) support).
4416  Teuchos::RCP<const map_type> colMap = this->colMap_;
4417  const bool sortEachProcsGids =
4418  this->sortGhostsAssociatedWithEachProcessor_;
4419 
4420  // FIXME (mfh 12 Feb 2017) ::Tpetra::Details::makeColMap returns a
4421  // per-process error code. If an error does occur on a process,
4422  // ::Tpetra::Details::makeColMap does NOT promise that all processes will
4423  // notice that error. This is the caller's responsibility. For
4424  // now, we only propagate (to all processes) and report the error
4425  // in debug mode. In the future, we need to add the local/global
4426  // error handling scheme used in BlockCrsMatrix to this class.
4427  if (debug_) {
4428  using Teuchos::outArg;
4429  using Teuchos::REDUCE_MIN;
4430  using Teuchos::reduceAll;
4431 
4432  std::ostringstream errStrm;
4433  const int lclErrCode =
4434  Details::makeColMap (colMap, remotePIDs,
4435  getDomainMap (), *this, sortEachProcsGids, &errStrm);
4436  auto comm = this->getComm ();
4437  if (! comm.is_null ()) {
4438  const int lclSuccess = (lclErrCode == 0) ? 1 : 0;
4439  int gblSuccess = 0; // output argument
4440  reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess,
4441  outArg (gblSuccess));
4442  if (gblSuccess != 1) {
4443  std::ostringstream os;
4444  Details::gathervPrint (os, errStrm.str (), *comm);
4445  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4446  (true, std::runtime_error, ": An error happened on at "
4447  "least one process in the CrsGraph's communicator. "
4448  "Here are all processes' error messages:" << std::endl
4449  << os.str ());
4450  }
4451  }
4452  }
4453  else {
4454  (void) Details::makeColMap (colMap, remotePIDs,
4455  getDomainMap (), *this, sortEachProcsGids, nullptr);
4456  }
4457  // See above. We want to admit the possibility of makeColMap
4458  // actually revising an existing column Map, even though that
4459  // doesn't currently (as of 10 May 2017) happen.
4460  this->colMap_ = colMap;
4461 
4462  checkInternalState ();
4463  if (verbose_) {
4464  std::ostringstream os;
4465  os << *prefix << "Done" << endl;
4466  std::cerr << os.str();
4467  }
4468  }
4469 
4470 
4471  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4472  void
4474  sortAndMergeAllIndices (const bool sorted, const bool merged)
4475  {
4476  using std::endl;
4477  using LO = LocalOrdinal;
4478  using host_execution_space =
4479  typename Kokkos::View<LO*, device_type>::HostMirror::
4480  execution_space;
4481  using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
4482  const char tfecfFuncName[] = "sortAndMergeAllIndices";
4483  Details::ProfilingRegion regionSortAndMerge
4484  ("Tpetra::CrsGraph::sortAndMergeAllIndices");
4485 
4486  std::unique_ptr<std::string> prefix;
4487  if (verbose_) {
4488  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4489  std::ostringstream os;
4490  os << *prefix << "Start: "
4491  << "sorted=" << (sorted ? "true" : "false")
4492  << ", merged=" << (merged ? "true" : "false") << endl;
4493  std::cerr << os.str();
4494  }
4495  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4496  (this->isGloballyIndexed(), std::logic_error,
4497  "This method may only be called after makeIndicesLocal." );
4498  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4499  (! merged && this->isStorageOptimized(), std::logic_error,
4500  "The graph is already storage optimized, so we shouldn't be "
4501  "merging any indices. "
4502  "Please report this bug to the Tpetra developers.");
4503 
4504  if (! sorted || ! merged) {
4505  const LO lclNumRows(this->getLocalNumRows());
4506  auto range = range_type(0, lclNumRows);
4507 
4508  if (verbose_) {
4509  size_t totalNumDups = 0;
4510  //Sync and mark-modified the local indices before disabling WDV tracking
4511  lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
4513  Kokkos::parallel_reduce(range,
4514  [this, sorted, merged] (const LO lclRow, size_t& numDups)
4515  {
4516  const RowInfo rowInfo = this->getRowInfo(lclRow);
4517  numDups += this->sortAndMergeRowIndices(rowInfo, sorted, merged);
4518  },
4519  totalNumDups);
4521  std::ostringstream os;
4522  os << *prefix << "totalNumDups=" << totalNumDups << endl;
4523  std::cerr << os.str();
4524  }
4525  else {
4526  //Sync and mark-modified the local indices before disabling WDV tracking
4527  lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
4529  Kokkos::parallel_for(range,
4530  [this, sorted, merged] (const LO lclRow)
4531  {
4532  const RowInfo rowInfo = this->getRowInfo(lclRow);
4533  this->sortAndMergeRowIndices(rowInfo, sorted, merged);
4534  });
4536  }
4537  this->indicesAreSorted_ = true; // we just sorted every row
4538  this->noRedundancies_ = true; // we just merged every row
4539  }
4540 
4541  if (verbose_) {
4542  std::ostringstream os;
4543  os << *prefix << "Done" << endl;
4544  std::cerr << os.str();
4545  }
4546  }
4547 
4548  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4549  void
4551  makeImportExport (Teuchos::Array<int>& remotePIDs,
4552  const bool useRemotePIDs)
4553  {
4554  using ::Tpetra::Details::ProfilingRegion;
4555  using Teuchos::ParameterList;
4556  using Teuchos::RCP;
4557  using Teuchos::rcp;
4558  const char tfecfFuncName[] = "makeImportExport: ";
4559  ProfilingRegion regionMIE ("Tpetra::CrsGraph::makeImportExport");
4560 
4561  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4562  (! this->hasColMap (), std::logic_error,
4563  "This method may not be called unless the graph has a column Map.");
4564  RCP<ParameterList> params = this->getNonconstParameterList (); // could be null
4565 
4566  // Don't do any checks to see if we need to create the Import, if
4567  // it exists already.
4568  //
4569  // FIXME (mfh 25 Mar 2013) This will become incorrect if we
4570  // change CrsGraph in the future to allow changing the column
4571  // Map after fillComplete. For now, the column Map is fixed
4572  // after the first fillComplete call.
4573  if (importer_.is_null ()) {
4574  // Create the Import instance if necessary.
4575  if (domainMap_ != colMap_ && (! domainMap_->isSameAs (*colMap_))) {
4576  if (params.is_null () || ! params->isSublist ("Import")) {
4577  if (useRemotePIDs) {
4578  importer_ = rcp (new import_type (domainMap_, colMap_, remotePIDs));
4579  }
4580  else {
4581  importer_ = rcp (new import_type (domainMap_, colMap_));
4582  }
4583  }
4584  else {
4585  RCP<ParameterList> importSublist = sublist (params, "Import", true);
4586  if (useRemotePIDs) {
4587  RCP<import_type> newImp =
4588  rcp (new import_type (domainMap_, colMap_, remotePIDs,
4589  importSublist));
4590  importer_ = newImp;
4591  }
4592  else {
4593  importer_ = rcp (new import_type (domainMap_, colMap_, importSublist));
4594  }
4595  }
4596  }
4597  }
4598 
4599  // Don't do any checks to see if we need to create the Export, if
4600  // it exists already.
4601  if (exporter_.is_null ()) {
4602  // Create the Export instance if necessary.
4603  if (rangeMap_ != rowMap_ && ! rangeMap_->isSameAs (*rowMap_)) {
4604  if (params.is_null () || ! params->isSublist ("Export")) {
4605  exporter_ = rcp (new export_type (rowMap_, rangeMap_));
4606  }
4607  else {
4608  RCP<ParameterList> exportSublist = sublist (params, "Export", true);
4609  exporter_ = rcp (new export_type (rowMap_, rangeMap_, exportSublist));
4610  }
4611  }
4612  }
4613  }
4614 
4615 
4616  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4617  std::string
4620  {
4621  std::ostringstream oss;
4622  oss << dist_object_type::description ();
4623  if (isFillComplete ()) {
4624  oss << "{status = fill complete"
4625  << ", global rows = " << getGlobalNumRows()
4626  << ", global cols = " << getGlobalNumCols()
4627  << ", global num entries = " << getGlobalNumEntries()
4628  << "}";
4629  }
4630  else {
4631  oss << "{status = fill not complete"
4632  << ", global rows = " << getGlobalNumRows()
4633  << "}";
4634  }
4635  return oss.str();
4636  }
4637 
4638 
4639  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4640  void
4642  describe (Teuchos::FancyOStream &out,
4643  const Teuchos::EVerbosityLevel verbLevel) const
4644  {
4645  using Teuchos::ArrayView;
4646  using Teuchos::Comm;
4647  using Teuchos::RCP;
4648  using Teuchos::VERB_DEFAULT;
4649  using Teuchos::VERB_NONE;
4650  using Teuchos::VERB_LOW;
4651  using Teuchos::VERB_MEDIUM;
4652  using Teuchos::VERB_HIGH;
4653  using Teuchos::VERB_EXTREME;
4654  using std::endl;
4655  using std::setw;
4656 
4657  Teuchos::EVerbosityLevel vl = verbLevel;
4658  if (vl == VERB_DEFAULT) vl = VERB_LOW;
4659  RCP<const Comm<int> > comm = this->getComm();
4660  const int myImageID = comm->getRank(),
4661  numImages = comm->getSize();
4662  size_t width = 1;
4663  for (size_t dec=10; dec<getGlobalNumRows(); dec *= 10) {
4664  ++width;
4665  }
4666  width = std::max<size_t> (width, static_cast<size_t> (11)) + 2;
4667  Teuchos::OSTab tab (out);
4668  // none: print nothing
4669  // low: print O(1) info from node 0
4670  // medium: print O(P) info, num entries per node
4671  // high: print O(N) info, num entries per row
4672  // extreme: print O(NNZ) info: print graph indices
4673  //
4674  // for medium and higher, print constituent objects at specified verbLevel
4675  if (vl != VERB_NONE) {
4676  if (myImageID == 0) out << this->description() << std::endl;
4677  // O(1) globals, minus what was already printed by description()
4678  if (isFillComplete() && myImageID == 0) {
4679  out << "Global max number of row entries = " << globalMaxNumRowEntries_ << std::endl;
4680  }
4681  // constituent objects
4682  if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
4683  if (myImageID == 0) out << "\nRow map: " << std::endl;
4684  rowMap_->describe(out,vl);
4685  if (colMap_ != Teuchos::null) {
4686  if (myImageID == 0) out << "\nColumn map: " << std::endl;
4687  colMap_->describe(out,vl);
4688  }
4689  if (domainMap_ != Teuchos::null) {
4690  if (myImageID == 0) out << "\nDomain map: " << std::endl;
4691  domainMap_->describe(out,vl);
4692  }
4693  if (rangeMap_ != Teuchos::null) {
4694  if (myImageID == 0) out << "\nRange map: " << std::endl;
4695  rangeMap_->describe(out,vl);
4696  }
4697  }
4698  // O(P) data
4699  if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
4700  for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
4701  if (myImageID == imageCtr) {
4702  out << "Node ID = " << imageCtr << std::endl
4703  << "Node number of entries = " << this->getLocalNumEntries () << std::endl
4704  << "Node max number of entries = " << nodeMaxNumRowEntries_ << std::endl;
4705  if (! indicesAreAllocated ()) {
4706  out << "Indices are not allocated." << std::endl;
4707  }
4708  }
4709  comm->barrier();
4710  comm->barrier();
4711  comm->barrier();
4712  }
4713  }
4714  // O(N) and O(NNZ) data
4715  if (vl == VERB_HIGH || vl == VERB_EXTREME) {
4716  for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
4717  if (myImageID == imageCtr) {
4718  out << std::setw(width) << "Node ID"
4719  << std::setw(width) << "Global Row"
4720  << std::setw(width) << "Num Entries";
4721  if (vl == VERB_EXTREME) {
4722  out << " Entries";
4723  }
4724  out << std::endl;
4725  const LocalOrdinal lclNumRows =
4726  static_cast<LocalOrdinal> (this->getLocalNumRows ());
4727  for (LocalOrdinal r=0; r < lclNumRows; ++r) {
4728  const RowInfo rowinfo = this->getRowInfo (r);
4729  GlobalOrdinal gid = rowMap_->getGlobalElement(r);
4730  out << std::setw(width) << myImageID
4731  << std::setw(width) << gid
4732  << std::setw(width) << rowinfo.numEntries;
4733  if (vl == VERB_EXTREME) {
4734  out << " ";
4735  if (isGloballyIndexed()) {
4736  auto rowview = gblInds_wdv.getHostView(Access::ReadOnly);
4737  for (size_t j=0; j < rowinfo.numEntries; ++j){
4738  GlobalOrdinal colgid = rowview[j + rowinfo.offset1D];
4739  out << colgid << " ";
4740  }
4741  }
4742  else if (isLocallyIndexed()) {
4743  auto rowview = lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
4744  for (size_t j=0; j < rowinfo.numEntries; ++j) {
4745  LocalOrdinal collid = rowview[j + rowinfo.offset1D];
4746  out << colMap_->getGlobalElement(collid) << " ";
4747  }
4748  }
4749  }
4750  out << std::endl;
4751  }
4752  }
4753  comm->barrier();
4754  comm->barrier();
4755  comm->barrier();
4756  }
4757  }
4758  }
4759  }
4760 
4761 
4762  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4763  bool
4765  checkSizes (const SrcDistObject& /* source */)
4766  {
4767  // It's not clear what kind of compatibility checks on sizes can
4768  // be performed here. Epetra_CrsGraph doesn't check any sizes for
4769  // compatibility.
4770  return true;
4771  }
4772 
4773  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4774  void
4777  (const SrcDistObject& source,
4778  const size_t numSameIDs,
4779  const Kokkos::DualView<const local_ordinal_type*,
4780  buffer_device_type>& permuteToLIDs,
4781  const Kokkos::DualView<const local_ordinal_type*,
4782  buffer_device_type>& permuteFromLIDs,
4783  const CombineMode /*CM*/)
4784  {
4785  using std::endl;
4786  using LO = local_ordinal_type;
4787  using GO = global_ordinal_type;
4788  using this_CRS_type = CrsGraph<LO, GO, node_type>;
4789  const char tfecfFuncName[] = "copyAndPermute: ";
4790  const bool verbose = verbose_;
4791 
4792  std::unique_ptr<std::string> prefix;
4793  if (verbose) {
4794  prefix = this->createPrefix("CrsGraph", "copyAndPermute");
4795  std::ostringstream os;
4796  os << *prefix << endl;
4797  std::cerr << os.str ();
4798  }
4799 
4800  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4801  (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0),
4802  std::runtime_error, "permuteToLIDs.extent(0) = "
4803  << permuteToLIDs.extent (0) << " != permuteFromLIDs.extent(0) = "
4804  << permuteFromLIDs.extent (0) << ".");
4805 
4806  // We know from checkSizes that the source object is a
4807  // row_graph_type, so we don't need to check again.
4808  const row_graph_type& srcRowGraph =
4809  dynamic_cast<const row_graph_type&> (source);
4810 
4811  if (verbose) {
4812  std::ostringstream os;
4813  os << *prefix << "Compute padding" << endl;
4814  std::cerr << os.str ();
4815  }
4816  auto padding = computeCrsPadding(srcRowGraph, numSameIDs,
4817  permuteToLIDs, permuteFromLIDs, verbose);
4818  applyCrsPadding(*padding, verbose);
4819 
4820  // If the source object is actually a CrsGraph, we can use view
4821  // mode instead of copy mode to access the entries in each row,
4822  // if the graph is not fill complete.
4823  const this_CRS_type* srcCrsGraph =
4824  dynamic_cast<const this_CRS_type*> (&source);
4825 
4826  const map_type& srcRowMap = *(srcRowGraph.getRowMap());
4827  const map_type& tgtRowMap = *(getRowMap());
4828  const bool src_filled = srcRowGraph.isFillComplete();
4829  nonconst_global_inds_host_view_type row_copy;
4830  LO myid = 0;
4831 
4832  //
4833  // "Copy" part of "copy and permute."
4834  //
4835  if (src_filled || srcCrsGraph == nullptr) {
4836  if (verbose) {
4837  std::ostringstream os;
4838  os << *prefix << "src_filled || srcCrsGraph == nullptr" << endl;
4839  std::cerr << os.str ();
4840  }
4841  // If the source graph is fill complete, we can't use view mode,
4842  // because the data might be stored in a different format not
4843  // compatible with the expectations of view mode. Also, if the
4844  // source graph is not a CrsGraph, we can't use view mode,
4845  // because RowGraph only provides copy mode access to the data.
4846  for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
4847  const GO gid = srcRowMap.getGlobalElement (myid);
4848  size_t row_length = srcRowGraph.getNumEntriesInGlobalRow (gid);
4849  Kokkos::resize(row_copy,row_length);
4850  size_t check_row_length = 0;
4851  srcRowGraph.getGlobalRowCopy (gid, row_copy, check_row_length);
4852  this->insertGlobalIndices (gid, row_length, row_copy.data());
4853  }
4854  } else {
4855  if (verbose) {
4856  std::ostringstream os;
4857  os << *prefix << "! src_filled && srcCrsGraph != nullptr" << endl;
4858  std::cerr << os.str ();
4859  }
4860  for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
4861  const GO gid = srcRowMap.getGlobalElement (myid);
4862  global_inds_host_view_type row;
4863  srcCrsGraph->getGlobalRowView (gid, row);
4864  this->insertGlobalIndices (gid, row.extent(0), row.data());
4865  }
4866  }
4867 
4868  //
4869  // "Permute" part of "copy and permute."
4870  //
4871  auto permuteToLIDs_h = permuteToLIDs.view_host ();
4872  auto permuteFromLIDs_h = permuteFromLIDs.view_host ();
4873 
4874  if (src_filled || srcCrsGraph == nullptr) {
4875  for (LO i = 0; i < static_cast<LO> (permuteToLIDs_h.extent (0)); ++i) {
4876  const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs_h[i]);
4877  const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs_h[i]);
4878  size_t row_length = srcRowGraph.getNumEntriesInGlobalRow (srcgid);
4879  Kokkos::resize(row_copy,row_length);
4880  size_t check_row_length = 0;
4881  srcRowGraph.getGlobalRowCopy (srcgid, row_copy, check_row_length);
4882  this->insertGlobalIndices (mygid, row_length, row_copy.data());
4883  }
4884  } else {
4885  for (LO i = 0; i < static_cast<LO> (permuteToLIDs_h.extent (0)); ++i) {
4886  const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs_h[i]);
4887  const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs_h[i]);
4888  global_inds_host_view_type row;
4889  srcCrsGraph->getGlobalRowView (srcgid, row);
4890  this->insertGlobalIndices (mygid, row.extent(0), row.data());
4891  }
4892  }
4893 
4894  if (verbose) {
4895  std::ostringstream os;
4896  os << *prefix << "Done" << endl;
4897  std::cerr << os.str ();
4898  }
4899  }
4900 
4901  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4902  void
4904  applyCrsPadding(const padding_type& padding,
4905  const bool verbose)
4906  {
4908  using Details::padCrsArrays;
4909  using std::endl;
4910  using LO = local_ordinal_type;
4911  using row_ptrs_type =
4912  typename local_graph_device_type::row_map_type::non_const_type;
4913  using range_policy =
4914  Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
4915  const char tfecfFuncName[] = "applyCrsPadding";
4916  ProfilingRegion regionCAP("Tpetra::CrsGraph::applyCrsPadding");
4917 
4918  std::unique_ptr<std::string> prefix;
4919  if (verbose) {
4920  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4921  std::ostringstream os;
4922  os << *prefix << "padding: ";
4923  padding.print(os);
4924  os << endl;
4925  std::cerr << os.str();
4926  }
4927  const int myRank = ! verbose ? -1 : [&] () {
4928  auto map = this->getMap();
4929  if (map.is_null()) {
4930  return -1;
4931  }
4932  auto comm = map->getComm();
4933  if (comm.is_null()) {
4934  return -1;
4935  }
4936  return comm->getRank();
4937  } ();
4938 
4939  // FIXME (mfh 10 Feb 2020) We shouldn't actually reallocate
4940  // row_ptrs_beg or allocate row_ptrs_end unless the allocation
4941  // size needs to increase. That should be the job of
4942  // padCrsArrays.
4943 
4944  // Assume global indexing we don't have any indices yet
4945  if (! indicesAreAllocated()) {
4946  if (verbose) {
4947  std::ostringstream os;
4948  os << *prefix << "Call allocateIndices" << endl;
4949  std::cerr << os.str();
4950  }
4951  allocateIndices(GlobalIndices, verbose);
4952  }
4953  TEUCHOS_ASSERT( indicesAreAllocated() );
4954 
4955  // Making copies here because k_rowPtrs_ has a const type. Otherwise, we
4956  // would use it directly.
4957 
4958  auto rowPtrsUnpacked_dev = this->getRowPtrsUnpackedDevice();
4959  if (verbose) {
4960  std::ostringstream os;
4961  os << *prefix << "Allocate row_ptrs_beg: "
4962  << rowPtrsUnpacked_dev.extent(0) << endl;
4963  std::cerr << os.str();
4964  }
4965  using Kokkos::view_alloc;
4966  using Kokkos::WithoutInitializing;
4967  row_ptrs_type row_ptrs_beg(
4968  view_alloc("row_ptrs_beg", WithoutInitializing),
4969  rowPtrsUnpacked_dev.extent(0));
4970  // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
4971  Kokkos::deep_copy(execution_space(),row_ptrs_beg, rowPtrsUnpacked_dev);
4972 
4973  const size_t N = row_ptrs_beg.extent(0) == 0 ? size_t(0) :
4974  size_t(row_ptrs_beg.extent(0) - 1);
4975  if (verbose) {
4976  std::ostringstream os;
4977  os << *prefix << "Allocate row_ptrs_end: " << N << endl;
4978  std::cerr << os.str();
4979  }
4980  row_ptrs_type row_ptrs_end(
4981  view_alloc("row_ptrs_end", WithoutInitializing), N);
4982  row_ptrs_type num_row_entries;
4983 
4984  const bool refill_num_row_entries = k_numRowEntries_.extent(0) != 0;
4985 
4986  execution_space().fence(); // we need above deep_copy to be done
4987 
4988  if (refill_num_row_entries) { // Case 1: Unpacked storage
4989  // We can't assume correct *this capture until C++17, and it's
4990  // likely more efficient just to capture what we need anyway.
4991  num_row_entries =
4992  row_ptrs_type(view_alloc("num_row_entries", WithoutInitializing), N);
4993  Kokkos::deep_copy(num_row_entries, this->k_numRowEntries_);
4994  Kokkos::parallel_for
4995  ("Fill end row pointers", range_policy(0, N),
4996  KOKKOS_LAMBDA (const size_t i) {
4997  row_ptrs_end(i) = row_ptrs_beg(i) + num_row_entries(i);
4998  });
4999  }
5000  else {
5001  // FIXME (mfh 10 Feb 2020) Fix padCrsArrays so that if packed
5002  // storage, we don't need row_ptr_end to be separate allocation;
5003  // could just have it alias row_ptr_beg+1.
5004  Kokkos::parallel_for
5005  ("Fill end row pointers", range_policy(0, N),
5006  KOKKOS_LAMBDA (const size_t i) {
5007  row_ptrs_end(i) = row_ptrs_beg(i+1);
5008  });
5009  }
5010 
5011  if (isGloballyIndexed()) {
5012  padCrsArrays(row_ptrs_beg, row_ptrs_end, gblInds_wdv,
5013  padding, myRank, verbose);
5014  }
5015  else {
5016  padCrsArrays(row_ptrs_beg, row_ptrs_end, lclIndsUnpacked_wdv,
5017  padding, myRank, verbose);
5018  }
5019 
5020  if (refill_num_row_entries) {
5021  Kokkos::parallel_for
5022  ("Fill num entries", range_policy(0, N),
5023  KOKKOS_LAMBDA (const size_t i) {
5024  num_row_entries(i) = row_ptrs_end(i) - row_ptrs_beg(i);
5025  });
5026  Kokkos::deep_copy(this->k_numRowEntries_, num_row_entries);
5027  }
5028  if (verbose) {
5029  std::ostringstream os;
5030  os << *prefix << "Reassign k_rowPtrs_; old size: "
5031  << rowPtrsUnpacked_dev.extent(0) << ", new size: "
5032  << row_ptrs_beg.extent(0) << endl;
5033  std::cerr << os.str();
5034  TEUCHOS_ASSERT( rowPtrsUnpacked_dev.extent(0) == row_ptrs_beg.extent(0) );
5035  }
5036 
5037  setRowPtrsUnpacked(row_ptrs_beg);
5038  }
5039 
5040  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5041  std::unique_ptr<
5042  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5043  >
5044  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5045  computeCrsPadding(
5046  const RowGraph<LocalOrdinal,GlobalOrdinal,Node>& source,
5047  const size_t numSameIDs,
5048  const Kokkos::DualView<const local_ordinal_type*,
5049  buffer_device_type>& permuteToLIDs,
5050  const Kokkos::DualView<const local_ordinal_type*,
5051  buffer_device_type>& permuteFromLIDs,
5052  const bool verbose) const
5053  {
5054  using LO = local_ordinal_type;
5055  using std::endl;
5056 
5057  std::unique_ptr<std::string> prefix;
5058  if (verbose) {
5059  prefix = this->createPrefix("CrsGraph",
5060  "computeCrsPadding(same & permute)");
5061  std::ostringstream os;
5062  os << *prefix << "{numSameIDs: " << numSameIDs
5063  << ", numPermutes: " << permuteFromLIDs.extent(0) << "}"
5064  << endl;
5065  std::cerr << os.str();
5066  }
5067 
5068  const int myRank = [&] () {
5069  auto comm = rowMap_.is_null() ? Teuchos::null :
5070  rowMap_->getComm();
5071  return comm.is_null() ? -1 : comm->getRank();
5072  } ();
5073  std::unique_ptr<padding_type> padding(
5074  new padding_type(myRank, numSameIDs,
5075  permuteFromLIDs.extent(0)));
5076 
5077  computeCrsPaddingForSameIDs(*padding, source,
5078  static_cast<LO>(numSameIDs));
5079  computeCrsPaddingForPermutedIDs(*padding, source, permuteToLIDs,
5080  permuteFromLIDs);
5081  return padding;
5082  }
5083 
5084  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5085  void
5086  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5087  computeCrsPaddingForSameIDs(
5088  padding_type& padding,
5089  const RowGraph<local_ordinal_type, global_ordinal_type,
5090  node_type>& source,
5091  const local_ordinal_type numSameIDs) const
5092  {
5093  using LO = local_ordinal_type;
5094  using GO = global_ordinal_type;
5095  using Details::Impl::getRowGraphGlobalRow;
5096  using std::endl;
5097  const char tfecfFuncName[] = "computeCrsPaddingForSameIds";
5098 
5099  std::unique_ptr<std::string> prefix;
5100  const bool verbose = verbose_;
5101  if (verbose) {
5102  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5103  std::ostringstream os;
5104  os << *prefix << "numSameIDs: " << numSameIDs << endl;
5105  std::cerr << os.str();
5106  }
5107 
5108  if (numSameIDs == 0) {
5109  return;
5110  }
5111 
5112  const map_type& srcRowMap = *(source.getRowMap());
5113  const map_type& tgtRowMap = *rowMap_;
5114  using this_CRS_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
5115  const this_CRS_type* srcCrs = dynamic_cast<const this_CRS_type*>(&source);
5116  const bool src_is_unique =
5117  srcCrs == nullptr ? false : srcCrs->isMerged();
5118  const bool tgt_is_unique = this->isMerged();
5119 
5120  std::vector<GO> srcGblColIndsScratch;
5121  std::vector<GO> tgtGblColIndsScratch;
5122 
5123  execute_sync_host_uvm_access(); // protect host UVM access
5124  for (LO lclRowInd = 0; lclRowInd < numSameIDs; ++lclRowInd) {
5125  const GO srcGblRowInd = srcRowMap.getGlobalElement(lclRowInd);
5126  const GO tgtGblRowInd = tgtRowMap.getGlobalElement(lclRowInd);
5127  auto srcGblColInds = getRowGraphGlobalRow(
5128  srcGblColIndsScratch, source, srcGblRowInd);
5129  auto tgtGblColInds = getRowGraphGlobalRow(
5130  tgtGblColIndsScratch, *this, tgtGblRowInd);
5131  padding.update_same(lclRowInd, tgtGblColInds.getRawPtr(),
5132  tgtGblColInds.size(), tgt_is_unique,
5133  srcGblColInds.getRawPtr(),
5134  srcGblColInds.size(), src_is_unique);
5135  }
5136  if (verbose) {
5137  std::ostringstream os;
5138  os << *prefix << "Done" << endl;
5139  std::cerr << os.str();
5140  }
5141  }
5142 
5143  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5144  void
5145  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5146  computeCrsPaddingForPermutedIDs(
5147  padding_type& padding,
5148  const RowGraph<local_ordinal_type, global_ordinal_type,
5149  node_type>& source,
5150  const Kokkos::DualView<const local_ordinal_type*,
5151  buffer_device_type>& permuteToLIDs,
5152  const Kokkos::DualView<const local_ordinal_type*,
5153  buffer_device_type>& permuteFromLIDs) const
5154  {
5155  using LO = local_ordinal_type;
5156  using GO = global_ordinal_type;
5157  using Details::Impl::getRowGraphGlobalRow;
5158  using std::endl;
5159  const char tfecfFuncName[] = "computeCrsPaddingForPermutedIds";
5160 
5161  std::unique_ptr<std::string> prefix;
5162  const bool verbose = verbose_;
5163  if (verbose) {
5164  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5165  std::ostringstream os;
5166  os << *prefix << "permuteToLIDs.extent(0): "
5167  << permuteToLIDs.extent(0)
5168  << ", permuteFromLIDs.extent(0): "
5169  << permuteFromLIDs.extent(0) << endl;
5170  std::cerr << os.str();
5171  }
5172 
5173  if (permuteToLIDs.extent(0) == 0) {
5174  return;
5175  }
5176 
5177  const map_type& srcRowMap = *(source.getRowMap());
5178  const map_type& tgtRowMap = *rowMap_;
5179  using this_CRS_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
5180  const this_CRS_type* srcCrs = dynamic_cast<const this_CRS_type*>(&source);
5181  const bool src_is_unique =
5182  srcCrs == nullptr ? false : srcCrs->isMerged();
5183  const bool tgt_is_unique = this->isMerged();
5184 
5185  TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host() );
5186  auto permuteToLIDs_h = permuteToLIDs.view_host();
5187  TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host() );
5188  auto permuteFromLIDs_h = permuteFromLIDs.view_host();
5189 
5190  std::vector<GO> srcGblColIndsScratch;
5191  std::vector<GO> tgtGblColIndsScratch;
5192  const LO numPermutes = static_cast<LO>(permuteToLIDs_h.extent(0));
5193 
5194  execute_sync_host_uvm_access(); // protect host UVM access
5195  for (LO whichPermute = 0; whichPermute < numPermutes; ++whichPermute) {
5196  const LO srcLclRowInd = permuteFromLIDs_h[whichPermute];
5197  const GO srcGblRowInd = srcRowMap.getGlobalElement(srcLclRowInd);
5198  auto srcGblColInds = getRowGraphGlobalRow(
5199  srcGblColIndsScratch, source, srcGblRowInd);
5200  const LO tgtLclRowInd = permuteToLIDs_h[whichPermute];
5201  const GO tgtGblRowInd = tgtRowMap.getGlobalElement(tgtLclRowInd);
5202  auto tgtGblColInds = getRowGraphGlobalRow(
5203  tgtGblColIndsScratch, *this, tgtGblRowInd);
5204  padding.update_permute(whichPermute, tgtLclRowInd,
5205  tgtGblColInds.getRawPtr(),
5206  tgtGblColInds.size(), tgt_is_unique,
5207  srcGblColInds.getRawPtr(),
5208  srcGblColInds.size(), src_is_unique);
5209  }
5210 
5211  if (verbose) {
5212  std::ostringstream os;
5213  os << *prefix << "Done" << endl;
5214  std::cerr << os.str();
5215  }
5216  }
5217 
5218  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5219  std::unique_ptr<
5220  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5221  >
5222  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5223  computeCrsPaddingForImports(
5224  const Kokkos::DualView<const local_ordinal_type*,
5225  buffer_device_type>& importLIDs,
5226  Kokkos::DualView<packet_type*, buffer_device_type> imports,
5227  Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
5228  const bool verbose) const
5229  {
5230  using Details::Impl::getRowGraphGlobalRow;
5231  using std::endl;
5232  using LO = local_ordinal_type;
5233  using GO = global_ordinal_type;
5234  const char tfecfFuncName[] = "computeCrsPaddingForImports";
5235 
5236  std::unique_ptr<std::string> prefix;
5237  if (verbose) {
5238  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5239  std::ostringstream os;
5240  os << *prefix << "importLIDs.extent(0): "
5241  << importLIDs.extent(0)
5242  << ", imports.extent(0): "
5243  << imports.extent(0)
5244  << ", numPacketsPerLID.extent(0): "
5245  << numPacketsPerLID.extent(0) << endl;
5246  std::cerr << os.str();
5247  }
5248 
5249  const LO numImports = static_cast<LO>(importLIDs.extent(0));
5250  const int myRank = [&] () {
5251  auto comm = rowMap_.is_null() ? Teuchos::null :
5252  rowMap_->getComm();
5253  return comm.is_null() ? -1 : comm->getRank();
5254  } ();
5255  std::unique_ptr<padding_type> padding(
5256  new padding_type(myRank, numImports));
5257 
5258  if (imports.need_sync_host()) {
5259  imports.sync_host();
5260  }
5261  auto imports_h = imports.view_host();
5262  if (numPacketsPerLID.need_sync_host ()) {
5263  numPacketsPerLID.sync_host();
5264  }
5265  auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5266 
5267  TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
5268  auto importLIDs_h = importLIDs.view_host();
5269 
5270  const map_type& tgtRowMap = *rowMap_;
5271  // Always merge source column indices, since isMerged() is
5272  // per-process state, and we don't know its value on other
5273  // processes that sent us data.
5274  constexpr bool src_is_unique = false;
5275  const bool tgt_is_unique = isMerged();
5276 
5277  std::vector<GO> tgtGblColIndsScratch;
5278  size_t offset = 0;
5279  execute_sync_host_uvm_access(); // protect host UVM access
5280  for (LO whichImport = 0; whichImport < numImports; ++whichImport) {
5281  // CrsGraph packs just global column indices, while CrsMatrix
5282  // packs bytes (first the number of entries in the row, then the
5283  // global column indices, then other stuff like the matrix
5284  // values in that row).
5285  const LO origSrcNumEnt =
5286  static_cast<LO>(numPacketsPerLID_h[whichImport]);
5287  GO* const srcGblColInds = imports_h.data() + offset;
5288 
5289  const LO tgtLclRowInd = importLIDs_h[whichImport];
5290  const GO tgtGblRowInd =
5291  tgtRowMap.getGlobalElement(tgtLclRowInd);
5292  auto tgtGblColInds = getRowGraphGlobalRow(
5293  tgtGblColIndsScratch, *this, tgtGblRowInd);
5294  const size_t origTgtNumEnt(tgtGblColInds.size());
5295 
5296  padding->update_import(whichImport, tgtLclRowInd,
5297  tgtGblColInds.getRawPtr(),
5298  origTgtNumEnt, tgt_is_unique,
5299  srcGblColInds,
5300  origSrcNumEnt, src_is_unique);
5301  offset += origSrcNumEnt;
5302  }
5303 
5304  if (verbose) {
5305  std::ostringstream os;
5306  os << *prefix << "Done" << endl;
5307  std::cerr << os.str();
5308  }
5309  return padding;
5310  }
5311 
5312  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5313  std::unique_ptr<
5314  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5315  >
5316  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5317  computePaddingForCrsMatrixUnpack(
5318  const Kokkos::DualView<const local_ordinal_type*,
5319  buffer_device_type>& importLIDs,
5320  Kokkos::DualView<char*, buffer_device_type> imports,
5321  Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
5322  const bool verbose) const
5323  {
5324  using Details::Impl::getRowGraphGlobalRow;
5325  using Details::PackTraits;
5326  using std::endl;
5327  using LO = local_ordinal_type;
5328  using GO = global_ordinal_type;
5329  const char tfecfFuncName[] = "computePaddingForCrsMatrixUnpack";
5330 
5331  std::unique_ptr<std::string> prefix;
5332  if (verbose) {
5333  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5334  std::ostringstream os;
5335  os << *prefix << "importLIDs.extent(0): "
5336  << importLIDs.extent(0)
5337  << ", imports.extent(0): "
5338  << imports.extent(0)
5339  << ", numPacketsPerLID.extent(0): "
5340  << numPacketsPerLID.extent(0) << endl;
5341  std::cerr << os.str();
5342  }
5343  const bool extraVerbose =
5344  verbose && Details::Behavior::verbose("CrsPadding");
5345 
5346  const LO numImports = static_cast<LO>(importLIDs.extent(0));
5347  TEUCHOS_ASSERT( LO(numPacketsPerLID.extent(0)) >= numImports );
5348  const int myRank = [&] () {
5349  auto comm = rowMap_.is_null() ? Teuchos::null :
5350  rowMap_->getComm();
5351  return comm.is_null() ? -1 : comm->getRank();
5352  } ();
5353  std::unique_ptr<padding_type> padding(
5354  new padding_type(myRank, numImports));
5355 
5356  if (imports.need_sync_host()) {
5357  imports.sync_host();
5358  }
5359  auto imports_h = imports.view_host();
5360  if (numPacketsPerLID.need_sync_host ()) {
5361  numPacketsPerLID.sync_host();
5362  }
5363  auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5364 
5365  TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
5366  auto importLIDs_h = importLIDs.view_host();
5367 
5368  const map_type& tgtRowMap = *rowMap_;
5369  // Always merge source column indices, since isMerged() is
5370  // per-process state, and we don't know its value on other
5371  // processes that sent us data.
5372  constexpr bool src_is_unique = false;
5373  const bool tgt_is_unique = isMerged();
5374 
5375  std::vector<GO> srcGblColIndsScratch;
5376  std::vector<GO> tgtGblColIndsScratch;
5377  size_t offset = 0;
5378  execute_sync_host_uvm_access(); // protect host UVM access
5379  for (LO whichImport = 0; whichImport < numImports; ++whichImport) {
5380  // CrsGraph packs just global column indices, while CrsMatrix
5381  // packs bytes (first the number of entries in the row, then the
5382  // global column indices, then other stuff like the matrix
5383  // values in that row).
5384  const size_t numBytes = numPacketsPerLID_h[whichImport];
5385  if (extraVerbose) {
5386  std::ostringstream os;
5387  os << *prefix << "whichImport=" << whichImport
5388  << ", numImports=" << numImports
5389  << ", numBytes=" << numBytes << endl;
5390  std::cerr << os.str();
5391  }
5392  if (numBytes == 0) {
5393  continue; // special case: no entries to unpack for this row
5394  }
5395  LO origSrcNumEnt = 0;
5396  const size_t numEntBeg = offset;
5397  const size_t numEntLen =
5398  PackTraits<LO>::packValueCount(origSrcNumEnt);
5399  TEUCHOS_ASSERT( numBytes >= numEntLen );
5400  TEUCHOS_ASSERT( imports_h.extent(0) >= numEntBeg + numEntLen );
5401  PackTraits<LO>::unpackValue(origSrcNumEnt,
5402  imports_h.data() + numEntBeg);
5403  if (extraVerbose) {
5404  std::ostringstream os;
5405  os << *prefix << "whichImport=" << whichImport
5406  << ", numImports=" << numImports
5407  << ", origSrcNumEnt=" << origSrcNumEnt << endl;
5408  std::cerr << os.str();
5409  }
5410  TEUCHOS_ASSERT( origSrcNumEnt >= LO(0) );
5411  TEUCHOS_ASSERT( numBytes >= size_t(numEntLen + origSrcNumEnt * sizeof(GO)) );
5412  const size_t gidsBeg = numEntBeg + numEntLen;
5413  if (srcGblColIndsScratch.size() < size_t(origSrcNumEnt)) {
5414  srcGblColIndsScratch.resize(origSrcNumEnt);
5415  }
5416  GO* const srcGblColInds = srcGblColIndsScratch.data();
5417  PackTraits<GO>::unpackArray(srcGblColInds,
5418  imports_h.data() + gidsBeg,
5419  origSrcNumEnt);
5420  const LO tgtLclRowInd = importLIDs_h[whichImport];
5421  const GO tgtGblRowInd =
5422  tgtRowMap.getGlobalElement(tgtLclRowInd);
5423  auto tgtGblColInds = getRowGraphGlobalRow(
5424  tgtGblColIndsScratch, *this, tgtGblRowInd);
5425  const size_t origNumTgtEnt(tgtGblColInds.size());
5426 
5427  if (extraVerbose) {
5428  std::ostringstream os;
5429  os << *prefix << "whichImport=" << whichImport
5430  << ", numImports=" << numImports
5431  << ": Call padding->update_import" << endl;
5432  std::cerr << os.str();
5433  }
5434  padding->update_import(whichImport, tgtLclRowInd,
5435  tgtGblColInds.getRawPtr(),
5436  origNumTgtEnt, tgt_is_unique,
5437  srcGblColInds,
5438  origSrcNumEnt, src_is_unique);
5439  offset += numBytes;
5440  }
5441 
5442  if (verbose) {
5443  std::ostringstream os;
5444  os << *prefix << "Done" << endl;
5445  std::cerr << os.str();
5446  }
5447  return padding;
5448  }
5449 
5450  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5451  void
5452  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5453  packAndPrepare
5454  (const SrcDistObject& source,
5455  const Kokkos::DualView<const local_ordinal_type*,
5456  buffer_device_type>& exportLIDs,
5457  Kokkos::DualView<packet_type*,
5458  buffer_device_type>& exports,
5459  Kokkos::DualView<size_t*,
5460  buffer_device_type> numPacketsPerLID,
5461  size_t& constantNumPackets)
5462  {
5464  using GO = global_ordinal_type;
5465  using std::endl;
5466  using crs_graph_type =
5467  CrsGraph<local_ordinal_type, global_ordinal_type, node_type>;
5468  const char tfecfFuncName[] = "packAndPrepare: ";
5469  ProfilingRegion region_papn ("Tpetra::CrsGraph::packAndPrepare");
5470 
5471  const bool verbose = verbose_;
5472  std::unique_ptr<std::string> prefix;
5473  if (verbose) {
5474  prefix = this->createPrefix("CrsGraph", "packAndPrepare");
5475  std::ostringstream os;
5476  os << *prefix << "Start" << endl;
5477  std::cerr << os.str();
5478  }
5479 
5480  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5481  (exportLIDs.extent (0) != numPacketsPerLID.extent (0),
5482  std::runtime_error,
5483  "exportLIDs.extent(0) = " << exportLIDs.extent (0)
5484  << " != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent (0)
5485  << ".");
5486  const row_graph_type* srcRowGraphPtr =
5487  dynamic_cast<const row_graph_type*> (&source);
5488  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5489  (srcRowGraphPtr == nullptr, std::invalid_argument, "Source of an Export "
5490  "or Import operation to a CrsGraph must be a RowGraph with the same "
5491  "template parameters.");
5492  // We don't check whether src_graph has had fillComplete called,
5493  // because it doesn't matter whether the *source* graph has been
5494  // fillComplete'd. The target graph can not be fillComplete'd yet.
5495  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5496  (this->isFillComplete (), std::runtime_error,
5497  "The target graph of an Import or Export must not be fill complete.");
5498 
5499  const crs_graph_type* srcCrsGraphPtr =
5500  dynamic_cast<const crs_graph_type*> (&source);
5501 
5502  if (srcCrsGraphPtr == nullptr) {
5503  using Teuchos::ArrayView;
5504  using LO = local_ordinal_type;
5505 
5506  if (verbose) {
5507  std::ostringstream os;
5508  os << *prefix << "Source is a RowGraph but not a CrsGraph"
5509  << endl;
5510  std::cerr << os.str();
5511  }
5512  // RowGraph::pack serves the "old" DistObject interface. It
5513  // takes Teuchos::ArrayView and Teuchos::Array&. The latter
5514  // entails deep-copying the exports buffer on output. RowGraph
5515  // is a convenience interface when not a CrsGraph, so we accept
5516  // the performance hit.
5517  TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
5518  auto exportLIDs_h = exportLIDs.view_host ();
5519  ArrayView<const LO> exportLIDs_av (exportLIDs_h.data (),
5520  exportLIDs_h.extent (0));
5521  Teuchos::Array<GO> exports_a;
5522 
5523  numPacketsPerLID.clear_sync_state ();
5524  numPacketsPerLID.modify_host ();
5525  auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
5526  ArrayView<size_t> numPacketsPerLID_av (numPacketsPerLID_h.data (),
5527  numPacketsPerLID_h.extent (0));
5528  srcRowGraphPtr->pack (exportLIDs_av, exports_a, numPacketsPerLID_av,
5529  constantNumPackets);
5530  const size_t newSize = static_cast<size_t> (exports_a.size ());
5531  if (static_cast<size_t> (exports.extent (0)) != newSize) {
5532  using exports_dv_type = Kokkos::DualView<packet_type*, buffer_device_type>;
5533  exports = exports_dv_type ("exports", newSize);
5534  }
5535  Kokkos::View<const packet_type*, Kokkos::HostSpace,
5536  Kokkos::MemoryUnmanaged> exports_a_h (exports_a.getRawPtr (), newSize);
5537  exports.clear_sync_state ();
5538  exports.modify_host ();
5539  // DEEP_COPY REVIEW - NOT TESTED
5540  Kokkos::deep_copy (exports.view_host (), exports_a_h);
5541  }
5542  // packCrsGraphNew requires k_rowPtrsPacked_ to be set
5543  else if (! getColMap ().is_null () &&
5544  (this->getRowPtrsPackedDevice().extent (0) != 0 ||
5545  getRowMap ()->getLocalNumElements () == 0)) {
5546  if (verbose) {
5547  std::ostringstream os;
5548  os << *prefix << "packCrsGraphNew path" << endl;
5549  std::cerr << os.str();
5550  }
5551  using export_pids_type =
5552  Kokkos::DualView<const int*, buffer_device_type>;
5553  export_pids_type exportPIDs; // not filling it; needed for syntax
5554  using LO = local_ordinal_type;
5555  using NT = node_type;
5557  packCrsGraphNew<LO,GO,NT> (*srcCrsGraphPtr, exportLIDs, exportPIDs,
5558  exports, numPacketsPerLID,
5559  constantNumPackets, false);
5560  }
5561  else {
5562  srcCrsGraphPtr->packFillActiveNew (exportLIDs, exports, numPacketsPerLID,
5563  constantNumPackets);
5564  }
5565 
5566  if (verbose) {
5567  std::ostringstream os;
5568  os << *prefix << "Done" << endl;
5569  std::cerr << os.str();
5570  }
5571  }
5572 
5573  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5574  void
5576  pack (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5577  Teuchos::Array<GlobalOrdinal>& exports,
5578  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5579  size_t& constantNumPackets) const
5580  {
5581  auto col_map = this->getColMap();
5582  // packCrsGraph requires k_rowPtrsPacked to be set
5583  if( !col_map.is_null() && (this->getRowPtrsPackedDevice().extent(0) != 0 || getRowMap()->getLocalNumElements() ==0)) {
5585  packCrsGraph<LocalOrdinal,GlobalOrdinal,Node>(*this, exports, numPacketsPerLID,
5586  exportLIDs, constantNumPackets);
5587  }
5588  else {
5589  this->packFillActive(exportLIDs, exports, numPacketsPerLID,
5590  constantNumPackets);
5591  }
5592  }
5593 
5594  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5595  void
5597  packFillActive (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5598  Teuchos::Array<GlobalOrdinal>& exports,
5599  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5600  size_t& constantNumPackets) const
5601  {
5602  using std::endl;
5603  using LO = LocalOrdinal;
5604  using GO = GlobalOrdinal;
5605  using host_execution_space =
5606  typename Kokkos::View<size_t*, device_type>::
5607  HostMirror::execution_space;
5608  const char tfecfFuncName[] = "packFillActive: ";
5609  const bool verbose = verbose_;
5610 
5611  const auto numExportLIDs = exportLIDs.size ();
5612  std::unique_ptr<std::string> prefix;
5613  if (verbose) {
5614  prefix = this->createPrefix("CrsGraph", "allocateIndices");
5615  std::ostringstream os;
5616  os << *prefix << "numExportLIDs=" << numExportLIDs << endl;
5617  std::cerr << os.str();
5618  }
5619  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5620  (numExportLIDs != numPacketsPerLID.size (), std::runtime_error,
5621  "exportLIDs.size() = " << numExportLIDs << " != numPacketsPerLID.size()"
5622  " = " << numPacketsPerLID.size () << ".");
5623 
5624  const map_type& rowMap = * (this->getRowMap ());
5625  const map_type* const colMapPtr = this->colMap_.getRawPtr ();
5626  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5627  (this->isLocallyIndexed () && colMapPtr == nullptr, std::logic_error,
5628  "This graph claims to be locally indexed, but its column Map is nullptr. "
5629  "This should never happen. Please report this bug to the Tpetra "
5630  "developers.");
5631 
5632  // We may pack different amounts of data for different rows.
5633  constantNumPackets = 0;
5634 
5635  // mfh 20 Sep 2017: Teuchos::ArrayView isn't thread safe (well,
5636  // it might be now, but we might as well be safe).
5637  size_t* const numPacketsPerLID_raw = numPacketsPerLID.getRawPtr ();
5638  const LO* const exportLIDs_raw = exportLIDs.getRawPtr ();
5639 
5640  // Count the total number of packets (column indices, in the case
5641  // of a CrsGraph) to pack. While doing so, set
5642  // numPacketsPerLID[i] to the number of entries owned by the
5643  // calling process in (local) row exportLIDs[i] of the graph, that
5644  // the caller wants us to send out.
5645  Kokkos::RangePolicy<host_execution_space, LO> inputRange (0, numExportLIDs);
5646  size_t totalNumPackets = 0;
5647  size_t errCount = 0;
5648  // lambdas turn what they capture const, so we can't
5649  // atomic_add(&errCount,1). Instead, we need a View to modify.
5650  typedef Kokkos::Device<host_execution_space, Kokkos::HostSpace>
5651  host_device_type;
5652  Kokkos::View<size_t, host_device_type> errCountView (&errCount);
5653  constexpr size_t ONE = 1;
5654 
5655  execute_sync_host_uvm_access(); // protect host UVM access
5656  Kokkos::parallel_reduce ("Tpetra::CrsGraph::pack: totalNumPackets",
5657  inputRange,
5658  [=] (const LO& i, size_t& curTotalNumPackets) {
5659  const GO gblRow = rowMap.getGlobalElement (exportLIDs_raw[i]);
5660  if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
5661  Kokkos::atomic_add (&errCountView(), ONE);
5662  numPacketsPerLID_raw[i] = 0;
5663  }
5664  else {
5665  const size_t numEnt = this->getNumEntriesInGlobalRow (gblRow);
5666  numPacketsPerLID_raw[i] = numEnt;
5667  curTotalNumPackets += numEnt;
5668  }
5669  },
5670  totalNumPackets);
5671 
5672  if (verbose) {
5673  std::ostringstream os;
5674  os << *prefix << "totalNumPackets=" << totalNumPackets << endl;
5675  std::cerr << os.str();
5676  }
5677  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5678  (errCount != 0, std::logic_error, "totalNumPackets count encountered "
5679  "one or more errors! errCount = " << errCount
5680  << ", totalNumPackets = " << totalNumPackets << ".");
5681  errCount = 0;
5682 
5683  // Allocate space for all the column indices to pack.
5684  exports.resize (totalNumPackets);
5685 
5686  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5687  (! this->supportsRowViews (), std::logic_error,
5688  "this->supportsRowViews() returns false; this should never happen. "
5689  "Please report this bug to the Tpetra developers.");
5690 
5691  // Loop again over the rows to export, and pack rows of indices
5692  // into the output buffer.
5693 
5694  if (verbose) {
5695  std::ostringstream os;
5696  os << *prefix << "Pack into exports" << endl;
5697  std::cerr << os.str();
5698  }
5699 
5700  // Teuchos::ArrayView may not be thread safe, or may not be
5701  // efficiently thread safe. Better to use the raw pointer.
5702  GO* const exports_raw = exports.getRawPtr ();
5703  errCount = 0;
5704  Kokkos::parallel_scan ("Tpetra::CrsGraph::pack: pack from views",
5705  inputRange, [=, &prefix]
5706  (const LO i, size_t& exportsOffset, const bool final) {
5707  const size_t curOffset = exportsOffset;
5708  const GO gblRow = rowMap.getGlobalElement (exportLIDs_raw[i]);
5709  const RowInfo rowInfo =
5710  this->getRowInfoFromGlobalRowIndex (gblRow);
5711 
5712  using TDO = Tpetra::Details::OrdinalTraits<size_t>;
5713  if (rowInfo.localRow == TDO::invalid ()) {
5714  if (verbose) {
5715  std::ostringstream os;
5716  os << *prefix << ": INVALID rowInfo: i=" << i
5717  << ", lclRow=" << exportLIDs_raw[i] << endl;
5718  std::cerr << os.str();
5719  }
5720  Kokkos::atomic_add (&errCountView(), ONE);
5721  }
5722  else if (curOffset + rowInfo.numEntries > totalNumPackets) {
5723  if (verbose) {
5724  std::ostringstream os;
5725  os << *prefix << ": UH OH! For i=" << i << ", lclRow="
5726  << exportLIDs_raw[i] << ", gblRow=" << gblRow << ", curOffset "
5727  "(= " << curOffset << ") + numEnt (= " << rowInfo.numEntries
5728  << ") > totalNumPackets (= " << totalNumPackets << ")."
5729  << endl;
5730  std::cerr << os.str();
5731  }
5732  Kokkos::atomic_add (&errCountView(), ONE);
5733  }
5734  else {
5735  const LO numEnt = static_cast<LO> (rowInfo.numEntries);
5736  if (this->isLocallyIndexed ()) {
5737  auto lclColInds = getLocalIndsViewHost (rowInfo);
5738  if (final) {
5739  for (LO k = 0; k < numEnt; ++k) {
5740  const LO lclColInd = lclColInds(k);
5741  const GO gblColInd = colMapPtr->getGlobalElement (lclColInd);
5742  // Pack it, even if it's wrong. Let the receiving
5743  // process deal with it. Otherwise, we'll miss out
5744  // on any correct data.
5745  exports_raw[curOffset + k] = gblColInd;
5746  } // for each entry in the row
5747  } // final pass?
5748  exportsOffset = curOffset + numEnt;
5749  }
5750  else if (this->isGloballyIndexed ()) {
5751  auto gblColInds = getGlobalIndsViewHost (rowInfo);
5752  if (final) {
5753  for (LO k = 0; k < numEnt; ++k) {
5754  const GO gblColInd = gblColInds(k);
5755  // Pack it, even if it's wrong. Let the receiving
5756  // process deal with it. Otherwise, we'll miss out
5757  // on any correct data.
5758  exports_raw[curOffset + k] = gblColInd;
5759  } // for each entry in the row
5760  } // final pass?
5761  exportsOffset = curOffset + numEnt;
5762  }
5763  // If neither globally nor locally indexed, then the graph
5764  // has no entries in this row (or indeed, in any row on this
5765  // process) to pack.
5766  }
5767  });
5768 
5769  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5770  (errCount != 0, std::logic_error, "Packing encountered "
5771  "one or more errors! errCount = " << errCount
5772  << ", totalNumPackets = " << totalNumPackets << ".");
5773 
5774  if (verbose) {
5775  std::ostringstream os;
5776  os << *prefix << "Done" << endl;
5777  std::cerr << os.str();
5778  }
5779  }
5780 
5781  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5782  void
5783  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5784  packFillActiveNew (const Kokkos::DualView<const local_ordinal_type*,
5785  buffer_device_type>& exportLIDs,
5786  Kokkos::DualView<packet_type*,
5787  buffer_device_type>& exports,
5788  Kokkos::DualView<size_t*,
5789  buffer_device_type> numPacketsPerLID,
5790  size_t& constantNumPackets) const
5791  {
5792  using std::endl;
5793  using LO = local_ordinal_type;
5794  using GO = global_ordinal_type;
5795  using host_execution_space = typename Kokkos::View<size_t*,
5796  device_type>::HostMirror::execution_space;
5797  using host_device_type =
5798  Kokkos::Device<host_execution_space, Kokkos::HostSpace>;
5799  using exports_dv_type =
5800  Kokkos::DualView<packet_type*, buffer_device_type>;
5801  const char tfecfFuncName[] = "packFillActiveNew: ";
5802  const bool verbose = verbose_;
5803 
5804  const auto numExportLIDs = exportLIDs.extent (0);
5805  std::unique_ptr<std::string> prefix;
5806  if (verbose) {
5807  prefix = this->createPrefix("CrsGraph", "packFillActiveNew");
5808  std::ostringstream os;
5809  os << *prefix << "numExportLIDs: " << numExportLIDs
5810  << ", numPacketsPerLID.extent(0): "
5811  << numPacketsPerLID.extent(0) << endl;
5812  std::cerr << os.str();
5813  }
5814  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5815  (numExportLIDs != numPacketsPerLID.extent (0), std::runtime_error,
5816  "exportLIDs.extent(0) = " << numExportLIDs
5817  << " != numPacketsPerLID.extent(0) = "
5818  << numPacketsPerLID.extent (0) << ".");
5819  TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
5820  auto exportLIDs_h = exportLIDs.view_host ();
5821 
5822  const map_type& rowMap = * (this->getRowMap ());
5823  const map_type* const colMapPtr = this->colMap_.getRawPtr ();
5824  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5825  (this->isLocallyIndexed () && colMapPtr == nullptr, std::logic_error,
5826  "This graph claims to be locally indexed, but its column Map is nullptr. "
5827  "This should never happen. Please report this bug to the Tpetra "
5828  "developers.");
5829 
5830  // We may pack different amounts of data for different rows.
5831  constantNumPackets = 0;
5832 
5833  numPacketsPerLID.clear_sync_state ();
5834  numPacketsPerLID.modify_host ();
5835  auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
5836 
5837  // Count the total number of packets (column indices, in the case
5838  // of a CrsGraph) to pack. While doing so, set
5839  // numPacketsPerLID[i] to the number of entries owned by the
5840  // calling process in (local) row exportLIDs[i] of the graph, that
5841  // the caller wants us to send out.
5842  using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
5843  range_type inputRange (0, numExportLIDs);
5844  size_t totalNumPackets = 0;
5845  size_t errCount = 0;
5846  // lambdas turn what they capture const, so we can't
5847  // atomic_add(&errCount,1). Instead, we need a View to modify.
5848  Kokkos::View<size_t, host_device_type> errCountView (&errCount);
5849  constexpr size_t ONE = 1;
5850 
5851  if (verbose) {
5852  std::ostringstream os;
5853  os << *prefix << "Compute totalNumPackets" << endl;
5854  std::cerr << os.str ();
5855  }
5856 
5857  execute_sync_host_uvm_access(); // protect host UVM access
5858  totalNumPackets = 0;
5859  for (size_t i=0; i<numExportLIDs; ++i) {
5860  const LO lclRow = exportLIDs_h[i];
5861  const GO gblRow = rowMap.getGlobalElement (lclRow);
5862  if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
5863  if (verbose) {
5864  std::ostringstream os;
5865  os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5866  << " not in row Map on this process" << endl;
5867  std::cerr << os.str();
5868  }
5869  Kokkos::atomic_add (&errCountView(), ONE);
5870  numPacketsPerLID_h(i) = 0;
5871  }
5872  else {
5873  const size_t numEnt = this->getNumEntriesInGlobalRow (gblRow);
5874  numPacketsPerLID_h(i) = numEnt;
5875  totalNumPackets += numEnt;
5876  }
5877  }
5878 
5879  if (verbose) {
5880  std::ostringstream os;
5881  os << *prefix << "totalNumPackets: " << totalNumPackets
5882  << ", errCount: " << errCount << endl;
5883  std::cerr << os.str ();
5884  }
5885  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5886  (errCount != 0, std::logic_error, "totalNumPackets count encountered "
5887  "one or more errors! totalNumPackets: " << totalNumPackets
5888  << ", errCount: " << errCount << ".");
5889 
5890  // Allocate space for all the column indices to pack.
5891  if (size_t(exports.extent (0)) < totalNumPackets) {
5892  // FIXME (mfh 09 Apr 2019) Create without initializing.
5893  exports = exports_dv_type ("exports", totalNumPackets);
5894  }
5895 
5896  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5897  (! this->supportsRowViews (), std::logic_error,
5898  "this->supportsRowViews() returns false; this should never happen. "
5899  "Please report this bug to the Tpetra developers.");
5900 
5901  // Loop again over the rows to export, and pack rows of indices
5902  // into the output buffer.
5903 
5904  if (verbose) {
5905  std::ostringstream os;
5906  os << *prefix << "Pack into exports buffer" << endl;
5907  std::cerr << os.str();
5908  }
5909 
5910  exports.clear_sync_state ();
5911  exports.modify_host ();
5912  auto exports_h = exports.view_host ();
5913 
5914  errCount = 0;
5915 
5916  // The following parallel_scan needs const host access to lclIndsUnpacked_wdv
5917  // (if locally indexed) or gblInds_wdv (if globally indexed).
5918  if(isLocallyIndexed())
5919  lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
5920  else if(isGloballyIndexed())
5921  gblInds_wdv.getHostView(Access::ReadOnly);
5922 
5924  Kokkos::parallel_scan
5925  ("Tpetra::CrsGraph::packFillActiveNew: Pack exports",
5926  inputRange, [=, &prefix]
5927  (const LO i, size_t& exportsOffset, const bool final) {
5928  const size_t curOffset = exportsOffset;
5929  const LO lclRow = exportLIDs_h(i);
5930  const GO gblRow = rowMap.getGlobalElement (lclRow);
5931  if (gblRow == Details::OrdinalTraits<GO>::invalid ()) {
5932  if (verbose) {
5933  std::ostringstream os;
5934  os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5935  << " not in row Map on this process" << endl;
5936  std::cerr << os.str();
5937  }
5938  Kokkos::atomic_add (&errCountView(), ONE);
5939  return;
5940  }
5941 
5942  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (gblRow);
5943  if (rowInfo.localRow == Details::OrdinalTraits<size_t>::invalid ()) {
5944  if (verbose) {
5945  std::ostringstream os;
5946  os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5947  << ", gblRow=" << gblRow << ": invalid rowInfo"
5948  << endl;
5949  std::cerr << os.str();
5950  }
5951  Kokkos::atomic_add (&errCountView(), ONE);
5952  return;
5953  }
5954 
5955  if (curOffset + rowInfo.numEntries > totalNumPackets) {
5956  if (verbose) {
5957  std::ostringstream os;
5958  os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5959  << ", gblRow=" << gblRow << ", curOffset (= "
5960  << curOffset << ") + numEnt (= " << rowInfo.numEntries
5961  << ") > totalNumPackets (= " << totalNumPackets
5962  << ")." << endl;
5963  std::cerr << os.str();
5964  }
5965  Kokkos::atomic_add (&errCountView(), ONE);
5966  return;
5967  }
5968 
5969  const LO numEnt = static_cast<LO> (rowInfo.numEntries);
5970  if (this->isLocallyIndexed ()) {
5971  auto lclColInds = getLocalIndsViewHost(rowInfo);
5972  if (final) {
5973  for (LO k = 0; k < numEnt; ++k) {
5974  const LO lclColInd = lclColInds(k);
5975  const GO gblColInd = colMapPtr->getGlobalElement (lclColInd);
5976  // Pack it, even if it's wrong. Let the receiving
5977  // process deal with it. Otherwise, we'll miss out
5978  // on any correct data.
5979  exports_h(curOffset + k) = gblColInd;
5980  } // for each entry in the row
5981  } // final pass?
5982  exportsOffset = curOffset + numEnt;
5983  }
5984  else if (this->isGloballyIndexed ()) {
5985  auto gblColInds = getGlobalIndsViewHost(rowInfo);
5986  if (final) {
5987  for (LO k = 0; k < numEnt; ++k) {
5988  const GO gblColInd = gblColInds(k);
5989  // Pack it, even if it's wrong. Let the receiving
5990  // process deal with it. Otherwise, we'll miss out
5991  // on any correct data.
5992  exports_h(curOffset + k) = gblColInd;
5993  } // for each entry in the row
5994  } // final pass?
5995  exportsOffset = curOffset + numEnt;
5996  }
5997  // If neither globally nor locally indexed, then the graph
5998  // has no entries in this row (or indeed, in any row on this
5999  // process) to pack.
6000  });
6002 
6003  // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6004  // (errCount != 0, std::logic_error, "Packing encountered "
6005  // "one or more errors! errCount = " << errCount
6006  // << ", totalNumPackets = " << totalNumPackets << ".");
6007 
6008  if (verbose) {
6009  std::ostringstream os;
6010  os << *prefix << "errCount=" << errCount << "; Done" << endl;
6011  std::cerr << os.str();
6012  }
6013  }
6014 
6015  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6016  void
6019  (const Kokkos::DualView<const local_ordinal_type*,
6020  buffer_device_type>& importLIDs,
6021  Kokkos::DualView<packet_type*,
6022  buffer_device_type> imports,
6023  Kokkos::DualView<size_t*,
6024  buffer_device_type> numPacketsPerLID,
6025  const size_t /* constantNumPackets */,
6026  const CombineMode /* combineMode */ )
6027  {
6029  using std::endl;
6030  using LO = local_ordinal_type;
6031  using GO = global_ordinal_type;
6032  const char tfecfFuncName[] = "unpackAndCombine";
6033 
6034  ProfilingRegion regionCGC("Tpetra::CrsGraph::unpackAndCombine");
6035  const bool verbose = verbose_;
6036 
6037  std::unique_ptr<std::string> prefix;
6038  if (verbose) {
6039  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
6040  std::ostringstream os;
6041  os << *prefix << "Start" << endl;
6042  std::cerr << os.str ();
6043  }
6044  {
6045  auto padding = computeCrsPaddingForImports(
6046  importLIDs, imports, numPacketsPerLID, verbose);
6047  applyCrsPadding(*padding, verbose);
6048  if (verbose) {
6049  std::ostringstream os;
6050  os << *prefix << "Done computing & applying padding" << endl;
6051  std::cerr << os.str ();
6052  }
6053  }
6054 
6055  // FIXME (mfh 02 Apr 2012) REPLACE combine mode has a perfectly
6056  // reasonable meaning, whether or not the matrix is fill complete.
6057  // It's just more work to implement.
6058 
6059  // We are not checking the value of the CombineMode input
6060  // argument. For CrsGraph, we only support import/export
6061  // operations if fillComplete has not yet been called. Any
6062  // incoming column-indices are inserted into the target graph. In
6063  // this context, CombineMode values of ADD vs INSERT are
6064  // equivalent. What is the meaning of REPLACE for CrsGraph? If a
6065  // duplicate column-index is inserted, it will be compressed out
6066  // when fillComplete is called.
6067  //
6068  // Note: I think REPLACE means that an existing row is replaced by
6069  // the imported row, i.e., the existing indices are cleared. CGB,
6070  // 6/17/2010
6071 
6072  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6073  (importLIDs.extent (0) != numPacketsPerLID.extent (0),
6074  std::runtime_error, ": importLIDs.extent(0) = "
6075  << importLIDs.extent (0) << " != numPacketsPerLID.extent(0) = "
6076  << numPacketsPerLID.extent (0) << ".");
6077  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6078  (isFillComplete (), std::runtime_error,
6079  ": Import or Export operations are not allowed on a target "
6080  "CrsGraph that is fillComplete.");
6081 
6082  const size_t numImportLIDs(importLIDs.extent(0));
6083  if (numPacketsPerLID.need_sync_host()) {
6084  numPacketsPerLID.sync_host();
6085  }
6086  auto numPacketsPerLID_h = numPacketsPerLID.view_host();
6087  if (imports.need_sync_host()) {
6088  imports.sync_host();
6089  }
6090  auto imports_h = imports.view_host();
6091  TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
6092  auto importLIDs_h = importLIDs.view_host();
6093 
6094  // If we're inserting in local indices, let's pre-allocate
6095  Teuchos::Array<LO> lclColInds;
6096  if (isLocallyIndexed()) {
6097  if (verbose) {
6098  std::ostringstream os;
6099  os << *prefix << "Preallocate local indices scratch" << endl;
6100  std::cerr << os.str();
6101  }
6102  size_t maxNumInserts = 0;
6103  for (size_t i = 0; i < numImportLIDs; ++i) {
6104  maxNumInserts = std::max (maxNumInserts, numPacketsPerLID_h[i]);
6105  }
6106  if (verbose) {
6107  std::ostringstream os;
6108  os << *prefix << "Local indices scratch size: "
6109  << maxNumInserts << endl;
6110  std::cerr << os.str();
6111  }
6112  lclColInds.resize (maxNumInserts);
6113  }
6114  else {
6115  if (verbose) {
6116  std::ostringstream os;
6117  os << *prefix;
6118  if (isGloballyIndexed()) {
6119  os << "Graph is globally indexed";
6120  }
6121  else {
6122  os << "Graph is neither locally nor globally indexed";
6123  }
6124  os << endl;
6125  std::cerr << os.str();
6126  }
6127  }
6128 
6129  TEUCHOS_ASSERT( ! rowMap_.is_null() );
6130  const map_type& rowMap = *rowMap_;
6131 
6132  try {
6133  size_t importsOffset = 0;
6134  for (size_t i = 0; i < numImportLIDs; ++i) {
6135  if (verbose) {
6136  std::ostringstream os;
6137  os << *prefix << "i=" << i << ", numImportLIDs="
6138  << numImportLIDs << endl;
6139  std::cerr << os.str();
6140  }
6141  // We can only unpack into owned rows, since we only have
6142  // local row indices.
6143  const LO lclRow = importLIDs_h[i];
6144  const GO gblRow = rowMap.getGlobalElement(lclRow);
6145  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6146  (gblRow == Teuchos::OrdinalTraits<GO>::invalid(),
6147  std::logic_error, "importLIDs[i=" << i << "]="
6148  << lclRow << " is not in the row Map on the calling "
6149  "process.");
6150  const LO numEnt = numPacketsPerLID_h[i];
6151  const GO* const gblColInds = (numEnt == 0) ? nullptr :
6152  imports_h.data() + importsOffset;
6153  if (! isLocallyIndexed()) {
6154  insertGlobalIndicesFiltered(lclRow, gblColInds, numEnt);
6155  }
6156  else {
6157  // FIXME (mfh 09 Feb 2020) Now would be a good time to do
6158  // column Map filtering.
6159  for (LO j = 0; j < numEnt; j++) {
6160  lclColInds[j] = colMap_->getLocalElement(gblColInds[j]);
6161  }
6162  insertLocalIndices(lclRow, numEnt, lclColInds.data());
6163  }
6164  importsOffset += numEnt;
6165  }
6166  }
6167  catch (std::exception& e) {
6168  TEUCHOS_TEST_FOR_EXCEPTION
6169  (true, std::runtime_error,
6170  "Tpetra::CrsGraph::unpackAndCombine: Insert loop threw an "
6171  "exception: " << endl << e.what());
6172  }
6173 
6174  if (verbose) {
6175  std::ostringstream os;
6176  os << *prefix << "Done" << endl;
6177  std::cerr << os.str();
6178  }
6179  }
6180 
6181  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6182  void
6184  removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& newMap)
6185  {
6186  using Teuchos::Comm;
6187  using Teuchos::null;
6188  using Teuchos::ParameterList;
6189  using Teuchos::RCP;
6190 
6191  // We'll set all the state "transactionally," so that this method
6192  // satisfies the strong exception guarantee. This object's state
6193  // won't be modified until the end of this method.
6194  RCP<const map_type> rowMap, domainMap, rangeMap, colMap;
6195  RCP<import_type> importer;
6196  RCP<export_type> exporter;
6197 
6198  rowMap = newMap;
6199  RCP<const Comm<int> > newComm =
6200  (newMap.is_null ()) ? null : newMap->getComm ();
6201 
6202  if (! domainMap_.is_null ()) {
6203  if (domainMap_.getRawPtr () == rowMap_.getRawPtr ()) {
6204  // Common case: original domain and row Maps are identical.
6205  // In that case, we need only replace the original domain Map
6206  // with the new Map. This ensures that the new domain and row
6207  // Maps _stay_ identical.
6208  domainMap = newMap;
6209  } else {
6210  domainMap = domainMap_->replaceCommWithSubset (newComm);
6211  }
6212  }
6213  if (! rangeMap_.is_null ()) {
6214  if (rangeMap_.getRawPtr () == rowMap_.getRawPtr ()) {
6215  // Common case: original range and row Maps are identical. In
6216  // that case, we need only replace the original range Map with
6217  // the new Map. This ensures that the new range and row Maps
6218  // _stay_ identical.
6219  rangeMap = newMap;
6220  } else {
6221  rangeMap = rangeMap_->replaceCommWithSubset (newComm);
6222  }
6223  }
6224  if (! colMap_.is_null ()) {
6225  colMap = colMap_->replaceCommWithSubset (newComm);
6226  }
6227 
6228  // (Re)create the Export and / or Import if necessary.
6229  if (! newComm.is_null ()) {
6230  RCP<ParameterList> params = this->getNonconstParameterList (); // could be null
6231  //
6232  // The operations below are collective on the new communicator.
6233  //
6234  // (Re)create the Export object if necessary. If I haven't
6235  // called fillComplete yet, I don't have a rangeMap, so I must
6236  // first check if the _original_ rangeMap is not null. Ditto
6237  // for the Import object and the domain Map.
6238  if (! rangeMap_.is_null () &&
6239  rangeMap != rowMap &&
6240  ! rangeMap->isSameAs (*rowMap)) {
6241  if (params.is_null () || ! params->isSublist ("Export")) {
6242  exporter = rcp (new export_type (rowMap, rangeMap));
6243  }
6244  else {
6245  RCP<ParameterList> exportSublist = sublist (params, "Export", true);
6246  exporter = rcp (new export_type (rowMap, rangeMap, exportSublist));
6247  }
6248  }
6249  // (Re)create the Import object if necessary.
6250  if (! domainMap_.is_null () &&
6251  domainMap != colMap &&
6252  ! domainMap->isSameAs (*colMap)) {
6253  if (params.is_null () || ! params->isSublist ("Import")) {
6254  importer = rcp (new import_type (domainMap, colMap));
6255  } else {
6256  RCP<ParameterList> importSublist = sublist (params, "Import", true);
6257  importer = rcp (new import_type (domainMap, colMap, importSublist));
6258  }
6259  }
6260  } // if newComm is not null
6261 
6262  // Defer side effects until the end. If no destructors throw
6263  // exceptions (they shouldn't anyway), then this method satisfies
6264  // the strong exception guarantee.
6265  exporter_ = exporter;
6266  importer_ = importer;
6267  rowMap_ = rowMap;
6268  // mfh 31 Mar 2013: DistObject's map_ is the row Map of a CrsGraph
6269  // or CrsMatrix. CrsGraph keeps a redundant pointer (rowMap_) to
6270  // the same object. We might want to get rid of this redundant
6271  // pointer sometime, but for now, we'll leave it alone and just
6272  // set map_ to the same object.
6273  this->map_ = rowMap;
6274  domainMap_ = domainMap;
6275  rangeMap_ = rangeMap;
6276  colMap_ = colMap;
6277  }
6278 
6279  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6280  void
6282  getLocalDiagOffsets (const Kokkos::View<size_t*, device_type, Kokkos::MemoryUnmanaged>& offsets) const
6283  {
6284  using std::endl;
6285  using LO = LocalOrdinal;
6286  using GO = GlobalOrdinal;
6287  const char tfecfFuncName[] = "getLocalDiagOffsets: ";
6288  const bool verbose = verbose_;
6289 
6290  std::unique_ptr<std::string> prefix;
6291  if (verbose) {
6292  prefix = this->createPrefix("CrsGraph", "getLocalDiagOffsets");
6293  std::ostringstream os;
6294  os << *prefix << "offsets.extent(0)=" << offsets.extent(0)
6295  << endl;
6296  std::cerr << os.str();
6297  }
6298 
6299  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6300  (! hasColMap (), std::runtime_error, "The graph must have a column Map.");
6301  const LO lclNumRows = static_cast<LO> (this->getLocalNumRows ());
6302  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6303  (static_cast<LO> (offsets.extent (0)) < lclNumRows,
6304  std::invalid_argument, "offsets.extent(0) = " <<
6305  offsets.extent (0) << " < getLocalNumRows() = " << lclNumRows << ".");
6306 
6307  const map_type& rowMap = * (this->getRowMap ());
6308  const map_type& colMap = * (this->getColMap ());
6309 
6310  // We only use these in debug mode, but since debug mode is a
6311  // run-time option, they need to exist here. That's why we create
6312  // the vector with explicit size zero, to avoid overhead if debug
6313  // mode is off.
6314  bool allRowMapDiagEntriesInColMap = true;
6315  bool allDiagEntriesFound = true;
6316  bool allOffsetsCorrect = true;
6317  bool noOtherWeirdness = true;
6318  using wrong_offsets_type = std::vector<std::pair<LO, size_t> >;
6319  wrong_offsets_type wrongOffsets(0);
6320 
6321  // mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just
6322  // the subset of Map functionality that we need below.
6323  auto lclRowMap = rowMap.getLocalMap ();
6324  auto lclColMap = colMap.getLocalMap ();
6325 
6326  // FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this
6327  // setup, at least on the host. For CUDA, we have to use LocalMap
6328  // (that comes from each of the two Maps).
6329 
6330  const bool sorted = this->isSorted ();
6331  if (isFillComplete ()) {
6332  auto lclGraph = this->getLocalGraphDevice ();
6333  ::Tpetra::Details::getGraphDiagOffsets (offsets, lclRowMap, lclColMap,
6334  lclGraph.row_map,
6335  lclGraph.entries, sorted);
6336  }
6337  else {
6338  // NOTE (mfh 22 Feb 2017): We have to run this code on host,
6339  // since the graph is not fill complete. The previous version
6340  // of this code assumed UVM; this version does not.
6341  auto offsets_h = Kokkos::create_mirror_view (offsets);
6342 
6343  for (LO lclRowInd = 0; lclRowInd < lclNumRows; ++lclRowInd) {
6344  // Find the diagonal entry. Since the row Map and column Map
6345  // may differ, we have to compare global row and column
6346  // indices, not local.
6347  const GO gblRowInd = lclRowMap.getGlobalElement (lclRowInd);
6348  const GO gblColInd = gblRowInd;
6349  const LO lclColInd = lclColMap.getLocalElement (gblColInd);
6350 
6351  if (lclColInd == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
6352  allRowMapDiagEntriesInColMap = false;
6353  offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
6354  }
6355  else {
6356  const RowInfo rowInfo = this->getRowInfo (lclRowInd);
6357  if (static_cast<LO> (rowInfo.localRow) == lclRowInd &&
6358  rowInfo.numEntries > 0) {
6359 
6360  auto colInds = this->getLocalIndsViewHost (rowInfo);
6361  const size_t hint = 0; // not needed for this algorithm
6362  const size_t offset =
6363  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
6364  lclColInd, hint, sorted);
6365  offsets_h(lclRowInd) = offset;
6366 
6367  if (debug_) {
6368  // Now that we have what we think is an offset, make sure
6369  // that it really does point to the diagonal entry. Offsets
6370  // are _relative_ to each row, not absolute (for the whole
6371  // (local) graph).
6372  typename local_inds_dualv_type::t_host::const_type lclColInds;
6373  try {
6374  lclColInds = this->getLocalIndsViewHost (rowInfo);
6375  }
6376  catch (...) {
6377  noOtherWeirdness = false;
6378  }
6379  // Don't continue with error checking if the above failed.
6380  if (noOtherWeirdness) {
6381  const size_t numEnt = lclColInds.extent (0);
6382  if (offset >= numEnt) {
6383  // Offsets are relative to each row, so this means that
6384  // the offset is out of bounds.
6385  allOffsetsCorrect = false;
6386  wrongOffsets.push_back (std::make_pair (lclRowInd, offset));
6387  } else {
6388  const LO actualLclColInd = lclColInds(offset);
6389  const GO actualGblColInd = lclColMap.getGlobalElement (actualLclColInd);
6390  if (actualGblColInd != gblColInd) {
6391  allOffsetsCorrect = false;
6392  wrongOffsets.push_back (std::make_pair (lclRowInd, offset));
6393  }
6394  }
6395  }
6396  } // debug_
6397  }
6398  else { // either row is empty, or something went wrong w/ getRowInfo()
6399  offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
6400  allDiagEntriesFound = false;
6401  }
6402  } // whether lclColInd is a valid local column index
6403  } // for each local row
6404  // DEEP_COPY REVIEW - NOT TESTED
6405  Kokkos::deep_copy (offsets, offsets_h);
6406  } // whether the graph is fill complete
6407 
6408  if (verbose && wrongOffsets.size () != 0) {
6409  std::ostringstream os;
6410  os << *prefix << "Wrong offsets: [";
6411  for (size_t k = 0; k < wrongOffsets.size (); ++k) {
6412  os << "(" << wrongOffsets[k].first << ","
6413  << wrongOffsets[k].second << ")";
6414  if (k + 1 < wrongOffsets.size ()) {
6415  os << ", ";
6416  }
6417  }
6418  os << "]" << endl;
6419  std::cerr << os.str();
6420  }
6421 
6422  if (debug_) {
6423  using Teuchos::reduceAll;
6424  using std::endl;
6425  Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getComm ();
6426  const bool localSuccess =
6427  allRowMapDiagEntriesInColMap && allDiagEntriesFound && allOffsetsCorrect;
6428  const int numResults = 5;
6429  int lclResults[5];
6430  lclResults[0] = allRowMapDiagEntriesInColMap ? 1 : 0;
6431  lclResults[1] = allDiagEntriesFound ? 1 : 0;
6432  lclResults[2] = allOffsetsCorrect ? 1 : 0;
6433  lclResults[3] = noOtherWeirdness ? 1 : 0;
6434  // min-all-reduce will compute least rank of all the processes
6435  // that didn't succeed.
6436  lclResults[4] = ! localSuccess ? comm->getRank () : comm->getSize ();
6437 
6438  int gblResults[5];
6439  gblResults[0] = 0;
6440  gblResults[1] = 0;
6441  gblResults[2] = 0;
6442  gblResults[3] = 0;
6443  gblResults[4] = 0;
6444  reduceAll<int, int> (*comm, Teuchos::REDUCE_MIN,
6445  numResults, lclResults, gblResults);
6446 
6447  if (gblResults[0] != 1 || gblResults[1] != 1 || gblResults[2] != 1
6448  || gblResults[3] != 1) {
6449  std::ostringstream os; // build error message
6450  os << "Issue(s) that we noticed (on Process " << gblResults[4] << ", "
6451  "possibly among others): " << endl;
6452  if (gblResults[0] == 0) {
6453  os << " - The column Map does not contain at least one diagonal entry "
6454  "of the graph." << endl;
6455  }
6456  if (gblResults[1] == 0) {
6457  os << " - On one or more processes, some row does not contain a "
6458  "diagonal entry." << endl;
6459  }
6460  if (gblResults[2] == 0) {
6461  os << " - On one or more processes, some offsets are incorrect."
6462  << endl;
6463  }
6464  if (gblResults[3] == 0) {
6465  os << " - One or more processes had some other error."
6466  << endl;
6467  }
6468  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str());
6469  }
6470  } // debug_
6471  }
6472 
6473  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6474  void
6476  getLocalOffRankOffsets (offset_device_view_type& offsets) const
6477  {
6478  using std::endl;
6479  const char tfecfFuncName[] = "getLocalOffRankOffsets: ";
6480  const bool verbose = verbose_;
6481 
6482  std::unique_ptr<std::string> prefix;
6483  if (verbose) {
6484  prefix = this->createPrefix("CrsGraph", "getLocalOffRankOffsets");
6485  std::ostringstream os;
6486  os << *prefix << "offsets.extent(0)=" << offsets.extent(0)
6487  << endl;
6488  std::cerr << os.str();
6489  }
6490 
6491  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6492  (! hasColMap (), std::runtime_error, "The graph must have a column Map.");
6493  // Instead of throwing, we could also copy the rowPtr to k_offRankOffsets_.
6494 
6495  const size_t lclNumRows = this->getLocalNumRows ();
6496 
6497  if (haveLocalOffRankOffsets_ && k_offRankOffsets_.extent(0) == lclNumRows+1) {
6498  offsets = k_offRankOffsets_;
6499  return;
6500  }
6501  haveLocalOffRankOffsets_ = false;
6502 
6503  const map_type& colMap = * (this->getColMap ());
6504  const map_type& domMap = * (this->getDomainMap ());
6505 
6506  // mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just
6507  // the subset of Map functionality that we need below.
6508  auto lclColMap = colMap.getLocalMap ();
6509  auto lclDomMap = domMap.getLocalMap ();
6510 
6511  // FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this
6512  // setup, at least on the host. For CUDA, we have to use LocalMap
6513  // (that comes from each of the two Maps).
6514 
6515  TEUCHOS_ASSERT(this->isSorted ());
6516  if (isFillComplete ()) {
6517  k_offRankOffsets_ = offset_device_view_type(Kokkos::ViewAllocateWithoutInitializing("offRankOffset"), lclNumRows+1);
6518  auto lclGraph = this->getLocalGraphDevice ();
6519  ::Tpetra::Details::getGraphOffRankOffsets (k_offRankOffsets_,
6520  lclColMap, lclDomMap,
6521  lclGraph);
6522  offsets = k_offRankOffsets_;
6523  haveLocalOffRankOffsets_ = true;
6524  } else {
6525  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6526  (true, std::logic_error, "Can't get off-rank offsets for non-fill-complete graph");
6527  }
6528  }
6529 
6530  namespace { // (anonymous)
6531 
6532  // mfh 21 Jan 2016: This is useful for getLocalDiagOffsets (see
6533  // below). The point is to avoid the deep copy between the input
6534  // Teuchos::ArrayRCP and the internally used Kokkos::View. We
6535  // can't use UVM to avoid the deep copy with CUDA, because the
6536  // ArrayRCP is a host pointer, while the input to the graph's
6537  // getLocalDiagOffsets method is a device pointer. Assigning a
6538  // host pointer to a device pointer is incorrect unless the host
6539  // pointer points to host pinned memory. The goal is to get rid
6540  // of the Teuchos::ArrayRCP overload anyway, so we accept the deep
6541  // copy for backwards compatibility.
6542  //
6543  // We have to use template magic because
6544  // "staticGraph_->getLocalDiagOffsets(offsetsHosts)" won't compile
6545  // if device_type::memory_space is not Kokkos::HostSpace (as is
6546  // the case with CUDA).
6547 
6548  template<class DeviceType,
6549  const bool memSpaceIsHostSpace =
6550  std::is_same<typename DeviceType::memory_space,
6551  Kokkos::HostSpace>::value>
6552  struct HelpGetLocalDiagOffsets {};
6553 
6554  template<class DeviceType>
6555  struct HelpGetLocalDiagOffsets<DeviceType, true> {
6556  typedef DeviceType device_type;
6557  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6558  Kokkos::MemoryUnmanaged> device_offsets_type;
6559  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6560  Kokkos::MemoryUnmanaged> host_offsets_type;
6561 
6562  static device_offsets_type
6563  getDeviceOffsets (const host_offsets_type& hostOffsets)
6564  {
6565  // Host and device are the same; no need to allocate a
6566  // temporary device View.
6567  return hostOffsets;
6568  }
6569 
6570  static void
6571  copyBackIfNeeded (const host_offsets_type& /* hostOffsets */,
6572  const device_offsets_type& /* deviceOffsets */)
6573  { /* copy back not needed; host and device are the same */ }
6574  };
6575 
6576  template<class DeviceType>
6577  struct HelpGetLocalDiagOffsets<DeviceType, false> {
6578  typedef DeviceType device_type;
6579  // We have to do a deep copy, since host memory space != device
6580  // memory space. Thus, the device View is managed (we need to
6581  // allocate a temporary device View).
6582  typedef Kokkos::View<size_t*, device_type> device_offsets_type;
6583  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6584  Kokkos::MemoryUnmanaged> host_offsets_type;
6585 
6586  static device_offsets_type
6587  getDeviceOffsets (const host_offsets_type& hostOffsets)
6588  {
6589  // Host memory space != device memory space, so we must
6590  // allocate a temporary device View for the graph.
6591  return device_offsets_type ("offsets", hostOffsets.extent (0));
6592  }
6593 
6594  static void
6595  copyBackIfNeeded (const host_offsets_type& hostOffsets,
6596  const device_offsets_type& deviceOffsets)
6597  {
6598  // DEEP_COPY REVIEW - NOT TESTED
6599  Kokkos::deep_copy (hostOffsets, deviceOffsets);
6600  }
6601  };
6602  } // namespace (anonymous)
6603 
6604 
6605  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6606  void
6608  getLocalDiagOffsets (Teuchos::ArrayRCP<size_t>& offsets) const
6609  {
6610  typedef LocalOrdinal LO;
6611  const char tfecfFuncName[] = "getLocalDiagOffsets: ";
6612  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6613  (! this->hasColMap (), std::runtime_error,
6614  "The graph does not yet have a column Map.");
6615  const LO myNumRows = static_cast<LO> (this->getLocalNumRows ());
6616  if (static_cast<LO> (offsets.size ()) != myNumRows) {
6617  // NOTE (mfh 21 Jan 2016) This means that the method does not
6618  // satisfy the strong exception guarantee (no side effects
6619  // unless successful).
6620  offsets.resize (myNumRows);
6621  }
6622 
6623  // mfh 21 Jan 2016: This method unfortunately takes a
6624  // Teuchos::ArrayRCP, which is host memory. The graph wants a
6625  // device pointer. We can't access host memory from the device;
6626  // that's the wrong direction for UVM. (It's the right direction
6627  // for inefficient host pinned memory, but we don't want to use
6628  // that here.) Thus, if device memory space != host memory space,
6629  // we allocate and use a temporary device View to get the offsets.
6630  // If the two spaces are equal, the template magic makes the deep
6631  // copy go away.
6632  typedef HelpGetLocalDiagOffsets<device_type> helper_type;
6633  typedef typename helper_type::host_offsets_type host_offsets_type;
6634  // Unmanaged host View that views the output array.
6635  host_offsets_type hostOffsets (offsets.getRawPtr (), myNumRows);
6636  // Allocate temp device View if host != device, else reuse host array.
6637  auto deviceOffsets = helper_type::getDeviceOffsets (hostOffsets);
6638  // NOT recursion; this calls the overload that takes a device View.
6639  this->getLocalDiagOffsets (deviceOffsets);
6640  helper_type::copyBackIfNeeded (hostOffsets, deviceOffsets);
6641  }
6642 
6643  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6644  bool
6647  return true;
6648  }
6649 
6650  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6651  void
6654  const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& rowTransfer,
6655  const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node> > & domainTransfer,
6656  const Teuchos::RCP<const map_type>& domainMap,
6657  const Teuchos::RCP<const map_type>& rangeMap,
6658  const Teuchos::RCP<Teuchos::ParameterList>& params) const
6659  {
6664  using Teuchos::ArrayRCP;
6665  using Teuchos::ArrayView;
6666  using Teuchos::Comm;
6667  using Teuchos::ParameterList;
6668  using Teuchos::rcp;
6669  using Teuchos::RCP;
6670 #ifdef HAVE_TPETRA_MMM_TIMINGS
6671  using std::string;
6672  using Teuchos::TimeMonitor;
6673 #endif
6674 
6675  using LO = LocalOrdinal;
6676  using GO = GlobalOrdinal;
6677  using NT = node_type;
6678  using this_CRS_type = CrsGraph<LO, GO, NT>;
6679  using ivector_type = Vector<int, LO, GO, NT>;
6680 
6681  const char* prefix = "Tpetra::CrsGraph::transferAndFillComplete: ";
6682 
6683 #ifdef HAVE_TPETRA_MMM_TIMINGS
6684  string label;
6685  if(!params.is_null()) label = params->get("Timer Label", label);
6686  string prefix2 = string("Tpetra ")+ label + std::string(": CrsGraph TAFC ");
6687  RCP<TimeMonitor> MM =
6688  rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Pack-1"))));
6689 #endif
6690 
6691  // Make sure that the input argument rowTransfer is either an
6692  // Import or an Export. Import and Export are the only two
6693  // subclasses of Transfer that we defined, but users might
6694  // (unwisely, for now at least) decide to implement their own
6695  // subclasses. Exclude this possibility.
6696  const import_type* xferAsImport = dynamic_cast<const import_type*>(&rowTransfer);
6697  const export_type* xferAsExport = dynamic_cast<const export_type*>(&rowTransfer);
6698  TEUCHOS_TEST_FOR_EXCEPTION(
6699  xferAsImport == nullptr && xferAsExport == nullptr, std::invalid_argument,
6700  prefix << "The 'rowTransfer' input argument must be either an Import or "
6701  "an Export, and its template parameters must match the corresponding "
6702  "template parameters of the CrsGraph.");
6703 
6704  // Make sure that the input argument domainTransfer is either an
6705  // Import or an Export. Import and Export are the only two
6706  // subclasses of Transfer that we defined, but users might
6707  // (unwisely, for now at least) decide to implement their own
6708  // subclasses. Exclude this possibility.
6709  Teuchos::RCP<const import_type> xferDomainAsImport =
6710  Teuchos::rcp_dynamic_cast<const import_type>(domainTransfer);
6711  Teuchos::RCP<const export_type> xferDomainAsExport =
6712  Teuchos::rcp_dynamic_cast<const export_type>(domainTransfer);
6713 
6714  if(! domainTransfer.is_null()) {
6715 
6716  TEUCHOS_TEST_FOR_EXCEPTION(
6717  (xferDomainAsImport.is_null() && xferDomainAsExport.is_null()), std::invalid_argument,
6718  prefix << "The 'domainTransfer' input argument must be either an "
6719  "Import or an Export, and its template parameters must match the "
6720  "corresponding template parameters of the CrsGraph.");
6721 
6722  TEUCHOS_TEST_FOR_EXCEPTION(
6723  ( xferAsImport != nullptr || ! xferDomainAsImport.is_null() ) &&
6724  (( xferAsImport != nullptr && xferDomainAsImport.is_null() ) ||
6725  ( xferAsImport == nullptr && ! xferDomainAsImport.is_null() )), std::invalid_argument,
6726  prefix << "The 'rowTransfer' and 'domainTransfer' input arguments "
6727  "must be of the same type (either Import or Export).");
6728 
6729  TEUCHOS_TEST_FOR_EXCEPTION(
6730  ( xferAsExport != nullptr || ! xferDomainAsExport.is_null() ) &&
6731  (( xferAsExport != nullptr && xferDomainAsExport.is_null() ) ||
6732  ( xferAsExport == nullptr && ! xferDomainAsExport.is_null() )), std::invalid_argument,
6733  prefix << "The 'rowTransfer' and 'domainTransfer' input arguments "
6734  "must be of the same type (either Import or Export).");
6735 
6736  } // domainTransfer != null
6737 
6738 
6739  // FIXME (mfh 15 May 2014) Wouldn't communication still be needed,
6740  // if the source Map is not distributed but the target Map is?
6741  const bool communication_needed = rowTransfer.getSourceMap()->isDistributed();
6742 
6743  //
6744  // Get the caller's parameters
6745  //
6746 
6747  bool reverseMode = false; // Are we in reverse mode?
6748  bool restrictComm = false; // Do we need to restrict the communicator?
6749  RCP<ParameterList> graphparams; // parameters for the destination graph
6750  if (! params.is_null()) {
6751  reverseMode = params->get("Reverse Mode", reverseMode);
6752  restrictComm = params->get("Restrict Communicator", restrictComm);
6753  graphparams = sublist(params, "CrsGraph");
6754  }
6755 
6756  // Get the new domain and range Maps. We need some of them for error
6757  // checking, now that we have the reverseMode parameter.
6758  RCP<const map_type> MyRowMap = reverseMode ?
6759  rowTransfer.getSourceMap() : rowTransfer.getTargetMap();
6760  RCP<const map_type> MyColMap; // create this below
6761  RCP<const map_type> MyDomainMap = ! domainMap.is_null() ? domainMap : getDomainMap();
6762  RCP<const map_type> MyRangeMap = ! rangeMap.is_null() ? rangeMap : getRangeMap();
6763  RCP<const map_type> BaseRowMap = MyRowMap;
6764  RCP<const map_type> BaseDomainMap = MyDomainMap;
6765 
6766  // If the user gave us a nonnull destGraph, then check whether it's
6767  // "pristine." That means that it has no entries.
6768  //
6769  // FIXME (mfh 15 May 2014) If this is not true on all processes,
6770  // then this exception test may hang. It would be better to
6771  // forward an error flag to the next communication phase.
6772  if (! destGraph.is_null()) {
6773  // FIXME (mfh 15 May 2014): The Epetra idiom for checking
6774  // whether a graph or matrix has no entries on the calling
6775  // process, is that it is neither locally nor globally indexed.
6776  // This may change eventually with the Kokkos refactor version
6777  // of Tpetra, so it would be better just to check the quantity
6778  // of interest directly. Note that with the Kokkos refactor
6779  // version of Tpetra, asking for the total number of entries in
6780  // a graph or matrix that is not fill complete might require
6781  // computation (kernel launch), since it is not thread scalable
6782  // to update a count every time an entry is inserted.
6783  const bool NewFlag =
6784  ! destGraph->isLocallyIndexed() && ! destGraph->isGloballyIndexed();
6785  TEUCHOS_TEST_FOR_EXCEPTION(! NewFlag, std::invalid_argument,
6786  prefix << "The input argument 'destGraph' is only allowed to be nonnull, "
6787  "if its graph is empty (neither locally nor globally indexed).");
6788 
6789  // FIXME (mfh 15 May 2014) At some point, we want to change
6790  // graphs and matrices so that their DistObject Map
6791  // (this->getMap()) may differ from their row Map. This will
6792  // make redistribution for 2-D distributions more efficient. I
6793  // hesitate to change this check, because I'm not sure how much
6794  // the code here depends on getMap() and getRowMap() being the
6795  // same.
6796  TEUCHOS_TEST_FOR_EXCEPTION(
6797  ! destGraph->getRowMap()->isSameAs(*MyRowMap), std::invalid_argument,
6798  prefix << "The (row) Map of the input argument 'destGraph' is not the "
6799  "same as the (row) Map specified by the input argument 'rowTransfer'.");
6800 
6801  TEUCHOS_TEST_FOR_EXCEPTION(
6802  ! destGraph->checkSizes(*this), std::invalid_argument,
6803  prefix << "You provided a nonnull destination graph, but checkSizes() "
6804  "indicates that it is not a legal legal target for redistribution from "
6805  "the source graph (*this). This may mean that they do not have the "
6806  "same dimensions.");
6807  }
6808 
6809  // If forward mode (the default), then *this's (row) Map must be
6810  // the same as the source Map of the Transfer. If reverse mode,
6811  // then *this's (row) Map must be the same as the target Map of
6812  // the Transfer.
6813  //
6814  // FIXME (mfh 15 May 2014) At some point, we want to change graphs
6815  // and matrices so that their DistObject Map (this->getMap()) may
6816  // differ from their row Map. This will make redistribution for
6817  // 2-D distributions more efficient. I hesitate to change this
6818  // check, because I'm not sure how much the code here depends on
6819  // getMap() and getRowMap() being the same.
6820  TEUCHOS_TEST_FOR_EXCEPTION(
6821  ! (reverseMode || getRowMap()->isSameAs(*rowTransfer.getSourceMap())),
6822  std::invalid_argument, prefix <<
6823  "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
6824 
6825  TEUCHOS_TEST_FOR_EXCEPTION(
6826  ! (! reverseMode || getRowMap()->isSameAs(*rowTransfer.getTargetMap())),
6827  std::invalid_argument, prefix <<
6828  "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
6829 
6830  // checks for domainTransfer
6831  TEUCHOS_TEST_FOR_EXCEPTION(
6832  ! xferDomainAsImport.is_null() && ! xferDomainAsImport->getTargetMap()->isSameAs(*domainMap),
6833  std::invalid_argument,
6834  prefix << "The target map of the 'domainTransfer' input argument must be "
6835  "the same as the rebalanced domain map 'domainMap'");
6836 
6837  TEUCHOS_TEST_FOR_EXCEPTION(
6838  ! xferDomainAsExport.is_null() && ! xferDomainAsExport->getSourceMap()->isSameAs(*domainMap),
6839  std::invalid_argument,
6840  prefix << "The source map of the 'domainTransfer' input argument must be "
6841  "the same as the rebalanced domain map 'domainMap'");
6842 
6843  // The basic algorithm here is:
6844  //
6845  // 1. Call the moral equivalent of "Distor.do" to handle the import.
6846  // 2. Copy all the Imported and Copy/Permuted data into the raw
6847  // CrsGraph pointers, still using GIDs.
6848  // 3. Call an optimized version of MakeColMap that avoids the
6849  // Directory lookups (since the importer knows who owns all the
6850  // GIDs) AND reindexes to LIDs.
6851  // 4. Call expertStaticFillComplete()
6852 
6853  // Get information from the Importer
6854  const size_t NumSameIDs = rowTransfer.getNumSameIDs();
6855  ArrayView<const LO> ExportLIDs = reverseMode ?
6856  rowTransfer.getRemoteLIDs() : rowTransfer.getExportLIDs();
6857  ArrayView<const LO> RemoteLIDs = reverseMode ?
6858  rowTransfer.getExportLIDs() : rowTransfer.getRemoteLIDs();
6859  ArrayView<const LO> PermuteToLIDs = reverseMode ?
6860  rowTransfer.getPermuteFromLIDs() : rowTransfer.getPermuteToLIDs();
6861  ArrayView<const LO> PermuteFromLIDs = reverseMode ?
6862  rowTransfer.getPermuteToLIDs() : rowTransfer.getPermuteFromLIDs();
6863  Distributor& Distor = rowTransfer.getDistributor();
6864 
6865  // Owning PIDs
6866  Teuchos::Array<int> SourcePids;
6867  Teuchos::Array<int> TargetPids;
6868  int MyPID = getComm()->getRank();
6869 
6870  // Temp variables for sub-communicators
6871  RCP<const map_type> ReducedRowMap, ReducedColMap,
6872  ReducedDomainMap, ReducedRangeMap;
6873  RCP<const Comm<int> > ReducedComm;
6874 
6875  // If the user gave us a null destGraph, then construct the new
6876  // destination graph. We will replace its column Map later.
6877  if (destGraph.is_null()) {
6878  destGraph = rcp(new this_CRS_type(MyRowMap, 0, graphparams));
6879  }
6880 
6881  /***************************************************/
6882  /***** 1) First communicator restriction phase ****/
6883  /***************************************************/
6884  if (restrictComm) {
6885  ReducedRowMap = MyRowMap->removeEmptyProcesses();
6886  ReducedComm = ReducedRowMap.is_null() ?
6887  Teuchos::null :
6888  ReducedRowMap->getComm();
6889  destGraph->removeEmptyProcessesInPlace(ReducedRowMap);
6890 
6891  ReducedDomainMap = MyRowMap.getRawPtr() == MyDomainMap.getRawPtr() ?
6892  ReducedRowMap :
6893  MyDomainMap->replaceCommWithSubset(ReducedComm);
6894  ReducedRangeMap = MyRowMap.getRawPtr() == MyRangeMap.getRawPtr() ?
6895  ReducedRowMap :
6896  MyRangeMap->replaceCommWithSubset(ReducedComm);
6897 
6898  // Reset the "my" maps
6899  MyRowMap = ReducedRowMap;
6900  MyDomainMap = ReducedDomainMap;
6901  MyRangeMap = ReducedRangeMap;
6902 
6903  // Update my PID, if we've restricted the communicator
6904  if (! ReducedComm.is_null()) {
6905  MyPID = ReducedComm->getRank();
6906  }
6907  else {
6908  MyPID = -2; // For debugging
6909  }
6910  }
6911  else {
6912  ReducedComm = MyRowMap->getComm();
6913  }
6914 
6915  /***************************************************/
6916  /***** 2) From Tpera::DistObject::doTransfer() ****/
6917  /***************************************************/
6918 #ifdef HAVE_TPETRA_MMM_TIMINGS
6919  MM = Teuchos::null;
6920  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("ImportSetup"))));
6921 #endif
6922  // Get the owning PIDs
6923  RCP<const import_type> MyImporter = getImporter();
6924 
6925  // check whether domain maps of source graph and base domain map is the same
6926  bool bSameDomainMap = BaseDomainMap->isSameAs(*getDomainMap());
6927 
6928  if (! restrictComm && ! MyImporter.is_null() && bSameDomainMap ) {
6929  // Same domain map as source graph
6930  //
6931  // NOTE: This won't work for restrictComm (because the Import
6932  // doesn't know the restricted PIDs), though writing an
6933  // optimized version for that case would be easy (Import an
6934  // IntVector of the new PIDs). Might want to add this later.
6935  Import_Util::getPids(*MyImporter, SourcePids, false);
6936  }
6937  else if (restrictComm && ! MyImporter.is_null() && bSameDomainMap) {
6938  // Same domain map as source graph (restricted communicator)
6939  // We need one import from the domain to the column map
6940  ivector_type SourceDomain_pids(getDomainMap(),true);
6941  ivector_type SourceCol_pids(getColMap());
6942  // SourceDomain_pids contains the restricted pids
6943  SourceDomain_pids.putScalar(MyPID);
6944 
6945  SourceCol_pids.doImport(SourceDomain_pids, *MyImporter, INSERT);
6946  SourcePids.resize(getColMap()->getLocalNumElements());
6947  SourceCol_pids.get1dCopy(SourcePids());
6948  }
6949  else if (MyImporter.is_null() && bSameDomainMap) {
6950  // Graph has no off-process entries
6951  SourcePids.resize(getColMap()->getLocalNumElements());
6952  SourcePids.assign(getColMap()->getLocalNumElements(), MyPID);
6953  }
6954  else if ( ! MyImporter.is_null() &&
6955  ! domainTransfer.is_null() ) {
6956  // general implementation for rectangular matrices with
6957  // domain map different than SourceGraph domain map.
6958  // User has to provide a DomainTransfer object. We need
6959  // to communications (import/export)
6960 
6961  // TargetDomain_pids lives on the rebalanced new domain map
6962  ivector_type TargetDomain_pids(domainMap);
6963  TargetDomain_pids.putScalar(MyPID);
6964 
6965  // SourceDomain_pids lives on the non-rebalanced old domain map
6966  ivector_type SourceDomain_pids(getDomainMap());
6967 
6968  // SourceCol_pids lives on the non-rebalanced old column map
6969  ivector_type SourceCol_pids(getColMap());
6970 
6971  if (! reverseMode && ! xferDomainAsImport.is_null() ) {
6972  SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsImport, INSERT);
6973  }
6974  else if (reverseMode && ! xferDomainAsExport.is_null() ) {
6975  SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsExport, INSERT);
6976  }
6977  else if (! reverseMode && ! xferDomainAsExport.is_null() ) {
6978  SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsExport, INSERT);
6979  }
6980  else if (reverseMode && ! xferDomainAsImport.is_null() ) {
6981  SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsImport, INSERT);
6982  }
6983  else {
6984  TEUCHOS_TEST_FOR_EXCEPTION(
6985  true, std::logic_error,
6986  prefix << "Should never get here! Please report this bug to a Tpetra developer.");
6987  }
6988  SourceCol_pids.doImport(SourceDomain_pids, *MyImporter, INSERT);
6989  SourcePids.resize(getColMap()->getLocalNumElements());
6990  SourceCol_pids.get1dCopy(SourcePids());
6991  }
6992  else if (BaseDomainMap->isSameAs(*BaseRowMap) &&
6993  getDomainMap()->isSameAs(*getRowMap())) {
6994  // We can use the rowTransfer + SourceGraph's Import to find out who owns what.
6995  ivector_type TargetRow_pids(domainMap);
6996  ivector_type SourceRow_pids(getRowMap());
6997  ivector_type SourceCol_pids(getColMap());
6998 
6999  TargetRow_pids.putScalar(MyPID);
7000  if (! reverseMode && xferAsImport != nullptr) {
7001  SourceRow_pids.doExport(TargetRow_pids, *xferAsImport, INSERT);
7002  }
7003  else if (reverseMode && xferAsExport != nullptr) {
7004  SourceRow_pids.doExport(TargetRow_pids, *xferAsExport, INSERT);
7005  }
7006  else if (! reverseMode && xferAsExport != nullptr) {
7007  SourceRow_pids.doImport(TargetRow_pids, *xferAsExport, INSERT);
7008  }
7009  else if (reverseMode && xferAsImport != nullptr) {
7010  SourceRow_pids.doImport(TargetRow_pids, *xferAsImport, INSERT);
7011  }
7012  else {
7013  TEUCHOS_TEST_FOR_EXCEPTION(
7014  true, std::logic_error,
7015  prefix << "Should never get here! Please report this bug to a Tpetra developer.");
7016  }
7017  SourceCol_pids.doImport(SourceRow_pids, *MyImporter, INSERT);
7018  SourcePids.resize(getColMap()->getLocalNumElements());
7019  SourceCol_pids.get1dCopy(SourcePids());
7020  }
7021  else {
7022  TEUCHOS_TEST_FOR_EXCEPTION(
7023  true, std::invalid_argument,
7024  prefix << "This method only allows either domainMap == getDomainMap(), "
7025  "or (domainMap == rowTransfer.getTargetMap() and getDomainMap() == getRowMap()).");
7026  }
7027 
7028  // Tpetra-specific stuff
7029  size_t constantNumPackets = destGraph->constantNumberOfPackets();
7030  if (constantNumPackets == 0) {
7031  destGraph->reallocArraysForNumPacketsPerLid(ExportLIDs.size(),
7032  RemoteLIDs.size());
7033  }
7034  else {
7035  // There are a constant number of packets per element. We
7036  // already know (from the number of "remote" (incoming)
7037  // elements) how many incoming elements we expect, so we can
7038  // resize the buffer accordingly.
7039  const size_t rbufLen = RemoteLIDs.size() * constantNumPackets;
7040  destGraph->reallocImportsIfNeeded(rbufLen, false, nullptr);
7041  }
7042 
7043  {
7044  // packAndPrepare* methods modify numExportPacketsPerLID_.
7045  destGraph->numExportPacketsPerLID_.modify_host();
7046  Teuchos::ArrayView<size_t> numExportPacketsPerLID =
7047  getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
7048 
7049  // Pack & Prepare w/ owning PIDs
7050  packCrsGraphWithOwningPIDs(*this, destGraph->exports_,
7051  numExportPacketsPerLID, ExportLIDs,
7052  SourcePids, constantNumPackets);
7053  }
7054 
7055  // Do the exchange of remote data.
7056 #ifdef HAVE_TPETRA_MMM_TIMINGS
7057  MM = Teuchos::null;
7058  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Transfer"))));
7059 #endif
7060 
7061  if (communication_needed) {
7062  if (reverseMode) {
7063  if (constantNumPackets == 0) { // variable number of packets per LID
7064  // Make sure that host has the latest version, since we're
7065  // using the version on host. If host has the latest
7066  // version, syncing to host does nothing.
7067  destGraph->numExportPacketsPerLID_.sync_host();
7068  Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
7069  getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
7070  destGraph->numImportPacketsPerLID_.sync_host();
7071  Teuchos::ArrayView<size_t> numImportPacketsPerLID =
7072  getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
7073 
7074  Distor.doReversePostsAndWaits(destGraph->numExportPacketsPerLID_.view_host(), 1,
7075  destGraph->numImportPacketsPerLID_.view_host());
7076  size_t totalImportPackets = 0;
7077  for (Array_size_type i = 0; i < numImportPacketsPerLID.size(); ++i) {
7078  totalImportPackets += numImportPacketsPerLID[i];
7079  }
7080 
7081  // Reallocation MUST go before setting the modified flag,
7082  // because it may clear out the flags.
7083  destGraph->reallocImportsIfNeeded(totalImportPackets, false, nullptr);
7084  destGraph->imports_.modify_host();
7085  auto hostImports = destGraph->imports_.view_host();
7086  // This is a legacy host pack/unpack path, so use the host
7087  // version of exports_.
7088  destGraph->exports_.sync_host();
7089  auto hostExports = destGraph->exports_.view_host();
7090  Distor.doReversePostsAndWaits(hostExports,
7091  numExportPacketsPerLID,
7092  hostImports,
7093  numImportPacketsPerLID);
7094  }
7095  else { // constant number of packets per LI
7096  destGraph->imports_.modify_host();
7097  auto hostImports = destGraph->imports_.view_host();
7098  // This is a legacy host pack/unpack path, so use the host
7099  // version of exports_.
7100  destGraph->exports_.sync_host();
7101  auto hostExports = destGraph->exports_.view_host();
7102  Distor.doReversePostsAndWaits(hostExports,
7103  constantNumPackets,
7104  hostImports);
7105  }
7106  }
7107  else { // forward mode (the default)
7108  if (constantNumPackets == 0) { // variable number of packets per LID
7109  // Make sure that host has the latest version, since we're
7110  // using the version on host. If host has the latest
7111  // version, syncing to host does nothing.
7112  destGraph->numExportPacketsPerLID_.sync_host();
7113  destGraph->numImportPacketsPerLID_.sync_host();
7114  Distor.doPostsAndWaits(destGraph->numExportPacketsPerLID_.view_host(), 1,
7115  destGraph->numImportPacketsPerLID_.view_host());
7116 
7117  Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
7118  getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
7119  size_t totalImportPackets = 0;
7120  for (Array_size_type i = 0; i < numImportPacketsPerLID.size(); ++i) {
7121  totalImportPackets += numImportPacketsPerLID[i];
7122  }
7123 
7124  // Reallocation MUST go before setting the modified flag,
7125  // because it may clear out the flags.
7126  destGraph->reallocImportsIfNeeded(totalImportPackets, false, nullptr);
7127  destGraph->imports_.modify_host();
7128  auto hostImports = destGraph->imports_.view_host();
7129  // This is a legacy host pack/unpack path, so use the host
7130  // version of exports_.
7131  destGraph->exports_.sync_host();
7132  auto hostExports = destGraph->exports_.view_host();
7133  Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
7134  getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
7135  Distor.doPostsAndWaits(hostExports, numExportPacketsPerLID, hostImports, numImportPacketsPerLID);
7136  }
7137  else { // constant number of packets per LID
7138  destGraph->imports_.modify_host();
7139  auto hostImports = destGraph->imports_.view_host();
7140  // This is a legacy host pack/unpack path, so use the host
7141  // version of exports_.
7142  destGraph->exports_.sync_host();
7143  auto hostExports = destGraph->exports_.view_host();
7144  Distor.doPostsAndWaits(hostExports, constantNumPackets, hostImports);
7145  }
7146  }
7147  }
7148 
7149  /*********************************************************************/
7150  /**** 3) Copy all of the Same/Permute/Remote data into CSR_arrays ****/
7151  /*********************************************************************/
7152 
7153 #ifdef HAVE_TPETRA_MMM_TIMINGS
7154  MM = Teuchos::null;
7155  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Unpack-1"))));
7156 #endif
7157 
7158  // Backwards compatibility measure. We'll use this again below.
7159  destGraph->numImportPacketsPerLID_.sync_host();
7160  Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
7161  getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
7162  destGraph->imports_.sync_host();
7163  Teuchos::ArrayView<const packet_type> hostImports =
7164  getArrayViewFromDualView(destGraph->imports_);
7165  size_t mynnz =
7166  unpackAndCombineWithOwningPIDsCount(*this, RemoteLIDs, hostImports,
7167  numImportPacketsPerLID,
7168  constantNumPackets, INSERT,
7169  NumSameIDs, PermuteToLIDs, PermuteFromLIDs);
7170  size_t N = BaseRowMap->getLocalNumElements();
7171 
7172  // Allocations
7173  ArrayRCP<size_t> CSR_rowptr(N+1);
7174  ArrayRCP<GO> CSR_colind_GID;
7175  ArrayRCP<LO> CSR_colind_LID;
7176  CSR_colind_GID.resize(mynnz);
7177 
7178  // If LO and GO are the same, we can reuse memory when
7179  // converting the column indices from global to local indices.
7180  if (typeid(LO) == typeid(GO)) {
7181  CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO>(CSR_colind_GID);
7182  }
7183  else {
7184  CSR_colind_LID.resize(mynnz);
7185  }
7186 
7187  // FIXME (mfh 15 May 2014) Why can't we abstract this out as an
7188  // unpackAndCombine method on a "CrsArrays" object? This passing
7189  // in a huge list of arrays is icky. Can't we have a bit of an
7190  // abstraction? Implementing a concrete DistObject subclass only
7191  // takes five methods.
7192  unpackAndCombineIntoCrsArrays(*this, RemoteLIDs, hostImports,
7193  numImportPacketsPerLID, constantNumPackets,
7194  INSERT, NumSameIDs, PermuteToLIDs,
7195  PermuteFromLIDs, N, mynnz, MyPID,
7196  CSR_rowptr(), CSR_colind_GID(),
7197  SourcePids(), TargetPids);
7198 
7199  /**************************************************************/
7200  /**** 4) Call Optimized MakeColMap w/ no Directory Lookups ****/
7201  /**************************************************************/
7202 #ifdef HAVE_TPETRA_MMM_TIMINGS
7203  MM = Teuchos::null;
7204  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Unpack-2"))));
7205 #endif
7206  // Call an optimized version of makeColMap that avoids the
7207  // Directory lookups (since the Import object knows who owns all
7208  // the GIDs).
7209  Teuchos::Array<int> RemotePids;
7210  Import_Util::lowCommunicationMakeColMapAndReindex(CSR_rowptr(),
7211  CSR_colind_LID(),
7212  CSR_colind_GID(),
7213  BaseDomainMap,
7214  TargetPids, RemotePids,
7215  MyColMap);
7216 
7217  /*******************************************************/
7218  /**** 4) Second communicator restriction phase ****/
7219  /*******************************************************/
7220  if (restrictComm) {
7221  ReducedColMap = (MyRowMap.getRawPtr() == MyColMap.getRawPtr()) ?
7222  ReducedRowMap :
7223  MyColMap->replaceCommWithSubset(ReducedComm);
7224  MyColMap = ReducedColMap; // Reset the "my" maps
7225  }
7226 
7227  // Replace the col map
7228  destGraph->replaceColMap(MyColMap);
7229 
7230  // Short circuit if the processor is no longer in the communicator
7231  //
7232  // NOTE: Epetra replaces modifies all "removed" processes so they
7233  // have a dummy (serial) Map that doesn't touch the original
7234  // communicator. Duplicating that here might be a good idea.
7235  if (ReducedComm.is_null()) {
7236  return;
7237  }
7238 
7239  /***************************************************/
7240  /**** 5) Sort ****/
7241  /***************************************************/
7242  if ((! reverseMode && xferAsImport != nullptr) ||
7243  (reverseMode && xferAsExport != nullptr)) {
7244  Import_Util::sortCrsEntries(CSR_rowptr(),
7245  CSR_colind_LID());
7246  }
7247  else if ((! reverseMode && xferAsExport != nullptr) ||
7248  (reverseMode && xferAsImport != nullptr)) {
7249  Import_Util::sortAndMergeCrsEntries(CSR_rowptr(),
7250  CSR_colind_LID());
7251  if (CSR_rowptr[N] != mynnz) {
7252  CSR_colind_LID.resize(CSR_rowptr[N]);
7253  }
7254  }
7255  else {
7256  TEUCHOS_TEST_FOR_EXCEPTION(
7257  true, std::logic_error,
7258  prefix << "Should never get here! Please report this bug to a Tpetra developer.");
7259  }
7260  /***************************************************/
7261  /**** 6) Reset the colmap and the arrays ****/
7262  /***************************************************/
7263 
7264  // Call constructor for the new graph (restricted as needed)
7265  //
7266  destGraph->setAllIndices(CSR_rowptr, CSR_colind_LID);
7267 
7268  /***************************************************/
7269  /**** 7) Build Importer & Call ESFC ****/
7270  /***************************************************/
7271  // Pre-build the importer using the existing PIDs
7272  Teuchos::ParameterList esfc_params;
7273 #ifdef HAVE_TPETRA_MMM_TIMINGS
7274  MM = Teuchos::null;
7275  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("CreateImporter"))));
7276 #endif
7277  RCP<import_type> MyImport = rcp(new import_type(MyDomainMap, MyColMap, RemotePids));
7278 #ifdef HAVE_TPETRA_MMM_TIMINGS
7279  MM = Teuchos::null;
7280  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("ESFC"))));
7281 
7282  esfc_params.set("Timer Label",prefix + std::string("TAFC"));
7283 #endif
7284  if(!params.is_null())
7285  esfc_params.set("compute global constants",params->get("compute global constants",true));
7286 
7287  destGraph->expertStaticFillComplete(MyDomainMap, MyRangeMap,
7288  MyImport, Teuchos::null, rcp(&esfc_params,false));
7289 
7290  }
7291 
7292  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7293  void
7296  const import_type& importer,
7297  const Teuchos::RCP<const map_type>& domainMap,
7298  const Teuchos::RCP<const map_type>& rangeMap,
7299  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7300  {
7301  transferAndFillComplete(destGraph, importer, Teuchos::null, domainMap, rangeMap, params);
7302  }
7303 
7304  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7305  void
7308  const import_type& rowImporter,
7309  const import_type& domainImporter,
7310  const Teuchos::RCP<const map_type>& domainMap,
7311  const Teuchos::RCP<const map_type>& rangeMap,
7312  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7313  {
7314  transferAndFillComplete(destGraph, rowImporter, Teuchos::rcpFromRef(domainImporter), domainMap, rangeMap, params);
7315  }
7316 
7317  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7318  void
7321  const export_type& exporter,
7322  const Teuchos::RCP<const map_type>& domainMap,
7323  const Teuchos::RCP<const map_type>& rangeMap,
7324  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7325  {
7326  transferAndFillComplete(destGraph, exporter, Teuchos::null, domainMap, rangeMap, params);
7327  }
7328 
7329  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7330  void
7333  const export_type& rowExporter,
7334  const export_type& domainExporter,
7335  const Teuchos::RCP<const map_type>& domainMap,
7336  const Teuchos::RCP<const map_type>& rangeMap,
7337  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7338  {
7339  transferAndFillComplete(destGraph, rowExporter, Teuchos::rcpFromRef(domainExporter), domainMap, rangeMap, params);
7340  }
7341 
7342 
7343  template<class LocalOrdinal, class GlobalOrdinal, class Node>
7344  void
7347  {
7348  std::swap(graph.need_sync_host_uvm_access, this->need_sync_host_uvm_access);
7349 
7350  std::swap(graph.rowMap_, this->rowMap_);
7351  std::swap(graph.colMap_, this->colMap_);
7352  std::swap(graph.rangeMap_, this->rangeMap_);
7353  std::swap(graph.domainMap_, this->domainMap_);
7354 
7355  std::swap(graph.importer_, this->importer_);
7356  std::swap(graph.exporter_, this->exporter_);
7357 
7358  std::swap(graph.nodeMaxNumRowEntries_, this->nodeMaxNumRowEntries_);
7359 
7360  std::swap(graph.globalNumEntries_, this->globalNumEntries_);
7361  std::swap(graph.globalMaxNumRowEntries_, this->globalMaxNumRowEntries_);
7362 
7363  std::swap(graph.numAllocForAllRows_, this->numAllocForAllRows_);
7364 
7365  std::swap(graph.rowPtrsPacked_dev_, this->rowPtrsPacked_dev_);
7366  std::swap(graph.rowPtrsPacked_host_, this->rowPtrsPacked_host_);
7367 
7368  std::swap(graph.rowPtrsUnpacked_dev_, this->rowPtrsUnpacked_dev_);
7369  std::swap(graph.rowPtrsUnpacked_host_, this->rowPtrsUnpacked_host_);
7370  std::swap(graph.packedUnpackedRowPtrsMatch_, this->packedUnpackedRowPtrsMatch_);
7371 
7372  std::swap(graph.k_offRankOffsets_, this->k_offRankOffsets_);
7373 
7374  std::swap(graph.lclIndsUnpacked_wdv, this->lclIndsUnpacked_wdv);
7375  std::swap(graph.gblInds_wdv, this->gblInds_wdv);
7376  std::swap(graph.lclIndsPacked_wdv, this->lclIndsPacked_wdv);
7377 
7378  std::swap(graph.storageStatus_, this->storageStatus_);
7379 
7380  std::swap(graph.indicesAreAllocated_, this->indicesAreAllocated_);
7381  std::swap(graph.indicesAreLocal_, this->indicesAreLocal_);
7382  std::swap(graph.indicesAreGlobal_, this->indicesAreGlobal_);
7383  std::swap(graph.fillComplete_, this->fillComplete_);
7384  std::swap(graph.indicesAreSorted_, this->indicesAreSorted_);
7385  std::swap(graph.noRedundancies_, this->noRedundancies_);
7386  std::swap(graph.haveLocalConstants_, this->haveLocalConstants_);
7387  std::swap(graph.haveGlobalConstants_, this->haveGlobalConstants_);
7388  std::swap(graph.haveLocalOffRankOffsets_, this->haveLocalOffRankOffsets_);
7389 
7390  std::swap(graph.sortGhostsAssociatedWithEachProcessor_, this->sortGhostsAssociatedWithEachProcessor_);
7391 
7392  std::swap(graph.k_numAllocPerRow_, this->k_numAllocPerRow_); // View
7393  std::swap(graph.k_numRowEntries_, this->k_numRowEntries_); // View
7394  std::swap(graph.nonlocals_, this->nonlocals_); // std::map
7395  }
7396 
7397 
7398  template<class LocalOrdinal, class GlobalOrdinal, class Node>
7399  bool
7402  {
7403  auto compare_nonlocals = [&] (const nonlocals_type & m1, const nonlocals_type & m2) {
7404  bool output = true;
7405  output = m1.size() == m2.size() ? output : false;
7406  for(auto & it_m: m1)
7407  {
7408  size_t key = it_m.first;
7409  output = m2.find(key) != m2.end() ? output : false;
7410  if(output)
7411  {
7412  auto v1 = m1.find(key)->second;
7413  auto v2 = m2.find(key)->second;
7414  std::sort(v1.begin(), v1.end());
7415  std::sort(v2.begin(), v2.end());
7416 
7417  output = v1.size() == v2.size() ? output : false;
7418  for(size_t i=0; output && i<v1.size(); i++)
7419  {
7420  output = v1[i]==v2[i] ? output : false;
7421  }
7422  }
7423  }
7424  return output;
7425  };
7426 
7427  bool output = true;
7428 
7429  output = this->rowMap_->isSameAs( *(graph.rowMap_) ) ? output : false;
7430  output = this->colMap_->isSameAs( *(graph.colMap_) ) ? output : false;
7431  output = this->rangeMap_->isSameAs( *(graph.rangeMap_) ) ? output : false;
7432  output = this->domainMap_->isSameAs( *(graph.domainMap_) ) ? output : false;
7433 
7434  output = this->nodeMaxNumRowEntries_ == graph.nodeMaxNumRowEntries_ ? output : false;
7435 
7436  output = this->globalNumEntries_ == graph.globalNumEntries_ ? output : false;
7437  output = this->globalMaxNumRowEntries_ == graph.globalMaxNumRowEntries_ ? output : false;
7438 
7439  output = this->numAllocForAllRows_ == graph.numAllocForAllRows_ ? output : false;
7440 
7441  output = this->storageStatus_ == graph.storageStatus_ ? output : false; // EStorageStatus is an enum
7442 
7443  output = this->indicesAreAllocated_ == graph.indicesAreAllocated_ ? output : false;
7444  output = this->indicesAreLocal_ == graph.indicesAreLocal_ ? output : false;
7445  output = this->indicesAreGlobal_ == graph.indicesAreGlobal_ ? output : false;
7446  output = this->fillComplete_ == graph.fillComplete_ ? output : false;
7447  output = this->indicesAreSorted_ == graph.indicesAreSorted_ ? output : false;
7448  output = this->noRedundancies_ == graph.noRedundancies_ ? output : false;
7449  output = this->haveLocalConstants_ == graph.haveLocalConstants_ ? output : false;
7450  output = this->haveGlobalConstants_ == graph.haveGlobalConstants_ ? output : false;
7451  output = this->haveLocalOffRankOffsets_ == graph.haveLocalOffRankOffsets_ ? output : false;
7452  output = this->sortGhostsAssociatedWithEachProcessor_ == graph.sortGhostsAssociatedWithEachProcessor_ ? output : false;
7453 
7454  // Compare nonlocals_ -- std::map<GlobalOrdinal, std::vector<GlobalOrdinal> >
7455  // nonlocals_ isa std::map<GO, std::vector<GO> >
7456  output = compare_nonlocals(this->nonlocals_, graph.nonlocals_) ? output : false;
7457 
7458  // Compare k_numAllocPerRow_ isa Kokkos::View::HostMirror
7459  // - since this is a HostMirror type, it should be in host memory already
7460  output = this->k_numAllocPerRow_.extent(0) == graph.k_numAllocPerRow_.extent(0) ? output : false;
7461  if(output && this->k_numAllocPerRow_.extent(0) > 0)
7462  {
7463  for(size_t i=0; output && i<this->k_numAllocPerRow_.extent(0); i++)
7464  output = this->k_numAllocPerRow_(i) == graph.k_numAllocPerRow_(i) ? output : false;
7465  }
7466 
7467  // Compare k_numRowEntries_ isa Kokkos::View::HostMirror
7468  // - since this is a HostMirror type, it should be in host memory already
7469  output = this->k_numRowEntries_.extent(0) == graph.k_numRowEntries_.extent(0) ? output : false;
7470  if(output && this->k_numRowEntries_.extent(0) > 0)
7471  {
7472  for(size_t i = 0; output && i < this->k_numRowEntries_.extent(0); i++)
7473  output = this->k_numRowEntries_(i) == graph.k_numRowEntries_(i) ? output : false;
7474  }
7475 
7476  // Compare this->k_rowPtrs_ isa Kokkos::View<LocalOrdinal*, ...>
7477  {
7478  auto rowPtrsThis = this->getRowPtrsUnpackedHost();
7479  auto rowPtrsGraph = graph.getRowPtrsUnpackedHost();
7480  output = rowPtrsThis .extent(0) == rowPtrsGraph.extent(0) ? output : false;
7481  for(size_t i=0; output && i< rowPtrsThis.extent(0); i++)
7482  output = rowPtrsThis(i) == rowPtrsGraph(i) ? output : false;
7483  }
7484 
7485  // Compare lclIndsUnpacked_wdv isa Kokkos::View<LocalOrdinal*, ...>
7486  output = this->lclIndsUnpacked_wdv.extent(0) == graph.lclIndsUnpacked_wdv.extent(0) ? output : false;
7487  if(output && this->lclIndsUnpacked_wdv.extent(0) > 0)
7488  {
7489  auto indThis = this->lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
7490  auto indGraph = graph.lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
7491  for(size_t i=0; output && i < indThis.extent(0); i++)
7492  output = indThis(i) == indGraph(i) ? output : false;
7493  }
7494 
7495  // Compare gblInds_wdv isa Kokkos::View<GlobalOrdinal*, ...>
7496  output = this->gblInds_wdv.extent(0) == graph.gblInds_wdv.extent(0) ? output : false;
7497  if(output && this->gblInds_wdv.extent(0) > 0)
7498  {
7499  auto indtThis = this->gblInds_wdv.getHostView(Access::ReadOnly);
7500  auto indtGraph = graph.gblInds_wdv.getHostView(Access::ReadOnly);
7501  for(size_t i=0; output && i<indtThis.extent(0); i++)
7502  output = indtThis(i) == indtGraph(i) ? output : false;
7503  }
7504 
7505  // Check lclGraph_ isa
7506  // KokkosSparse::StaticCrsGraph<LocalOrdinal, Kokkos::LayoutLeft, execution_space>
7507  // KokkosSparse::StaticCrsGraph has 3 data members in it:
7508  // Kokkos::View<size_type*, ...> row_map
7509  // (local_graph_device_type::row_map_type)
7510  // Kokkos::View<data_type*, ...> entries
7511  // (local_graph_device_type::entries_type)
7512  // Kokkos::View<size_type*, ...> row_block_offsets
7513  // (local_graph_device_type::row_block_type)
7514  // There is currently no KokkosSparse::StaticCrsGraph comparison function
7515  // that's built-in, so we will just compare
7516  // the three data items here. This can be replaced if Kokkos ever
7517  // puts in its own comparison routine.
7518  local_graph_host_type thisLclGraph = this->getLocalGraphHost();
7519  local_graph_host_type graphLclGraph = graph.getLocalGraphHost();
7520 
7521  output = thisLclGraph.row_map.extent(0) == graphLclGraph.row_map.extent(0)
7522  ? output : false;
7523  if(output && thisLclGraph.row_map.extent(0) > 0)
7524  {
7525  auto lclGraph_rowmap_host_this = thisLclGraph.row_map;
7526  auto lclGraph_rowmap_host_graph = graphLclGraph.row_map;
7527  for (size_t i=0; output && i < lclGraph_rowmap_host_this.extent(0); i++)
7528  output = lclGraph_rowmap_host_this(i) == lclGraph_rowmap_host_graph(i)
7529  ? output : false;
7530  }
7531 
7532  output = thisLclGraph.entries.extent(0) == graphLclGraph.entries.extent(0)
7533  ? output : false;
7534  if(output && thisLclGraph.entries.extent(0) > 0)
7535  {
7536  auto lclGraph_entries_host_this = thisLclGraph.entries;
7537  auto lclGraph_entries_host_graph = graphLclGraph.entries;
7538  for (size_t i=0; output && i < lclGraph_entries_host_this.extent(0); i++)
7539  output = lclGraph_entries_host_this(i) == lclGraph_entries_host_graph(i)
7540  ? output : false;
7541  }
7542 
7543  output =
7544  thisLclGraph.row_block_offsets.extent(0) ==
7545  graphLclGraph.row_block_offsets.extent(0) ? output : false;
7546  if(output && thisLclGraph.row_block_offsets.extent(0) > 0)
7547  {
7548  auto lclGraph_rbo_host_this = thisLclGraph.row_block_offsets;
7549  auto lclGraph_rbo_host_graph = graphLclGraph.row_block_offsets;
7550  for (size_t i=0; output && i < lclGraph_rbo_host_this.extent(0); i++)
7551  output = lclGraph_rbo_host_this(i) == lclGraph_rbo_host_graph(i)
7552  ? output : false;
7553  }
7554 
7555  // For Importer and Exporter, we don't need to explicitly check them since
7556  // they will be consistent with the maps.
7557  // Note: importer_ isa Teuchos::RCP<const import_type>
7558  // exporter_ isa Teuchos::RCP<const export_type>
7559 
7560  return output;
7561  }
7562 
7563 
7564 
7565 } // namespace Tpetra
7566 
7567 //
7568 // Explicit instantiation macros
7569 //
7570 // Must be expanded from within the Tpetra namespace!
7571 //
7572 
7573 #define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7574  template<> \
7575  Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7576  importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7577  const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7578  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7579  CrsGraph<LO,GO,NODE>::node_type>& importer, \
7580  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7581  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7582  CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7583  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7584  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7585  CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7586  const Teuchos::RCP<Teuchos::ParameterList>& params);
7587 
7588 #define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
7589  template<> \
7590  Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7591  importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7592  const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7593  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7594  CrsGraph<LO,GO,NODE>::node_type>& rowImporter, \
7595  const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7596  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7597  CrsGraph<LO,GO,NODE>::node_type>& domainImporter, \
7598  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7599  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7600  CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7601  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7602  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7603  CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7604  const Teuchos::RCP<Teuchos::ParameterList>& params);
7605 
7606 
7607 #define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7608  template<> \
7609  Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7610  exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7611  const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7612  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7613  CrsGraph<LO,GO,NODE>::node_type>& exporter, \
7614  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7615  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7616  CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7617  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7618  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7619  CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7620  const Teuchos::RCP<Teuchos::ParameterList>& params);
7621 
7622 #define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
7623  template<> \
7624  Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7625  exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7626  const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7627  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7628  CrsGraph<LO,GO,NODE>::node_type>& rowExporter, \
7629  const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7630  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7631  CrsGraph<LO,GO,NODE>::node_type>& domainExporter, \
7632  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7633  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7634  CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7635  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7636  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7637  CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7638  const Teuchos::RCP<Teuchos::ParameterList>& params);
7639 
7640 
7641 #define TPETRA_CRSGRAPH_INSTANT( LO, GO, NODE ) \
7642  template class CrsGraph<LO, GO, NODE>; \
7643  TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7644  TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7645  TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
7646  TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE)
7647 
7648 
7649 #endif // TPETRA_CRSGRAPH_DEF_HPP
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
void setDomainRangeMaps(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap)
void setAllIndices(const typename local_graph_device_type::row_map_type &rowPointers, const typename local_graph_device_type::entries_type::non_const_type &columnIndices)
Set the graph&#39;s data directly, using 1-D storage.
Teuchos::RCP< const map_type > getRowMap() const override
Returns the Map that describes the row distribution in this graph.
offset_device_view_type k_offRankOffsets_
The offsets for off-rank entries.
size_t nodeMaxNumRowEntries_
Local maximum of the number of entries in each row.
bool indicesAreSorted_
Whether the graph&#39;s indices are sorted in each row, on this process.
KokkosSparse::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, device_type, void, size_t > local_graph_device_type
The type of the part of the sparse graph on each MPI process.
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types...
Teuchos::RCP< const map_type > rangeMap_
The Map describing the range of the (matrix corresponding to the) graph.
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
local_inds_host_view_type getLocalIndicesHost() const
Get a host view of the packed column indicies.
GlobalOrdinal global_ordinal_type
The type of the graph&#39;s global indices.
local_inds_dualv_type::t_host::const_type getLocalIndsViewHost(const RowInfo &rowinfo) const
Get a const, locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myRo...
An abstract interface for graphs accessed by rows.
Kokkos::View< const size_t *, device_type >::HostMirror k_numAllocPerRow_
The maximum number of entries to allow in each locally owned row, per row.
void getLocalOffRankOffsets(offset_device_view_type &offsets) const
Get offsets of the off-rank entries in the graph.
void insertGlobalIndicesFiltered(const local_ordinal_type lclRow, const global_ordinal_type gblColInds[], const local_ordinal_type numGblColInds)
Like insertGlobalIndices(), but with column Map filtering.
local_inds_dualv_type::t_host getLocalIndsViewHostNonConst(const RowInfo &rowinfo)
Get a ReadWrite locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(m...
bool sortGhostsAssociatedWithEachProcessor_
Whether to require makeColMap() (and therefore fillComplete()) to order column Map GIDs associated wi...
size_t insertGlobalIndicesImpl(const local_ordinal_type lclRow, const global_ordinal_type inputGblColInds[], const size_t numInputInds)
Insert global indices, using an input local row index.
static KOKKOS_INLINE_FUNCTION size_t unpackValue(LO &outVal, const char inBuf[])
Unpack the given value from the given output buffer.
void resumeFill(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Resume fill operations.
bool haveGlobalConstants_
Whether all processes have computed global constants.
size_t getLocalNumRows() const override
Returns the number of graph rows owned on the calling node.
void expertStaticFillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< const import_type > &importer=Teuchos::null, const Teuchos::RCP< const export_type > &exporter=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Perform a fillComplete on a graph that already has data, via setAllIndices().
Teuchos::RCP< const import_type > getImporter() const override
Returns the importer associated with this graph.
void getLocalDiagOffsets(const Kokkos::View< size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Get offsets of the diagonal entries in the graph.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
void insertGlobalIndices(const global_ordinal_type globalRow, const Teuchos::ArrayView< const global_ordinal_type > &indices)
Insert global indices into the graph.
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Get the number of entries in the given row (local index).
size_t getLocalAllocationSize() const
The local number of indices allocated for the graph, over all rows on the calling (MPI) process...
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const override
Default parameter list suitable for validation.
Declaration of a function that prints strings from each process.
void computeLocalConstants()
Compute local constants, if they have not yet been computed.
void packCrsGraphWithOwningPIDs(const CrsGraph< LO, GO, NT > &sourceGraph, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse graph for communication.
typename row_graph_type::local_inds_device_view_type local_inds_device_view_type
The Kokkos::View type for views of local ordinals on device and host.
bool isIdenticalTo(const CrsGraph< LocalOrdinal, GlobalOrdinal, Node > &graph) const
Create a cloned CrsGraph for a different Node type.
bool isMerged() const
Whether duplicate column indices in each row have been merged.
virtual bool checkSizes(const SrcDistObject &source) override
Compare the source and target (this) objects for compatibility.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
size_t getGlobalMaxNumRowEntries() const override
Maximum number of entries in any row of the graph, over all processes in the graph&#39;s communicator...
Declare and define Tpetra::Details::copyOffsets, an implementation detail of Tpetra (in particular...
bool noRedundancies_
Whether the graph&#39;s indices are non-redundant (merged) in each row, on this process.
global_inds_dualv_type::t_dev::const_type getGlobalIndsViewDevice(const RowInfo &rowinfo) const
Get a const, globally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myR...
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices_wdv, const Padding &padding, const int my_rank, const bool verbose)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries...
OffsetsViewType::non_const_value_type computeOffsetsFromConstantCount(const OffsetsViewType &ptr, const CountType count)
Compute offsets from a constant count.
void replaceRangeMap(const Teuchos::RCP< const map_type > &newRangeMap)
Replace the current Range Map with the given objects.
void packCrsGraph(const CrsGraph< LO, GO, NT > &sourceGraph, Teuchos::Array< typename CrsGraph< LO, GO, NT >::packet_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse graph for communication.
row_ptrs_device_view_type getLocalRowPtrsDevice() const
Get a device view of the packed row offsets.
global_inds_dualv_type::t_host::const_type getGlobalIndsViewHost(const RowInfo &rowinfo) const
Get a const, globally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myR...
const row_ptrs_host_view_type & getRowPtrsPackedHost() const
Get the packed row pointers on host. Lazily make a copy from device.
static bool debug()
Whether Tpetra is in debug mode.
size_t getNumAllocatedEntriesInGlobalRow(global_ordinal_type globalRow) const
Current number of allocated entries in the given row on the calling (MPI) process, using a global row index.
Allocation information for a locally owned row in a CrsGraph or CrsMatrix.
local_inds_wdv_type lclIndsUnpacked_wdv
Local ordinals of column indices for all rows Valid when isLocallyIndexed is true If OptimizedStorage...
void replaceRangeMapAndExporter(const Teuchos::RCP< const map_type > &newRangeMap, const Teuchos::RCP< const export_type > &newExporter)
Replace the current Range Map and Export with the given parameters.
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
bool hasColMap() const override
Whether the graph has a column Map.
bool isGloballyIndexed() const override
Whether the graph&#39;s column indices are stored as global indices.
void exportAndFillComplete(Teuchos::RCP< CrsGraph< local_ordinal_type, global_ordinal_type, Node > > &destGraph, const export_type &exporter, const Teuchos::RCP< const map_type > &domainMap=Teuchos::null, const Teuchos::RCP< const map_type > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Export from this to the given destination graph, and make the result fill complete.
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode) override
Teuchos_Ordinal Array_size_type
Size type for Teuchos Array objects.
int local_ordinal_type
Default value of Scalar template parameter.
bool isStorageOptimized() const
Returns true if storage has been optimized.
bool haveLocalConstants_
Whether this process has computed local constants.
size_t findGlobalIndices(const RowInfo &rowInfo, const Teuchos::ArrayView< const global_ordinal_type > &indices, std::function< void(const size_t, const size_t, const size_t)> fun) const
Finds indices in the given row.
std::string description() const override
Return a one-line human-readable description of this object.
void sort(View &view, const size_t &size)
Convenience wrapper for std::sort for host-accessible views.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator, in rank order.
void makeColMap(Teuchos::Array< int > &remotePIDs)
Make and set the graph&#39;s column Map.
Teuchos::RCP< const export_type > getExporter() const override
Returns the exporter associated with this graph.
std::pair< size_t, std::string > makeIndicesLocal(const bool verbose=false)
Convert column indices from global to local.
size_t global_size_t
Global size_t object.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap) override
Remove processes owning zero rows from the Maps and their communicator.
Node node_type
This class&#39; Kokkos Node type.
void reindexColumns(const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortIndicesInEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
void makeImportExport(Teuchos::Array< int > &remotePIDs, const bool useRemotePIDs)
Make the Import and Export objects, if needed.
size_t sortAndMergeRowIndices(const RowInfo &rowInfo, const bool sorted, const bool merged)
Sort and merge duplicate column indices in the given row.
void insertLocalIndices(const local_ordinal_type localRow, const Teuchos::ArrayView< const local_ordinal_type > &indices)
Insert local indices into the graph.
Insert new values that don&#39;t currently exist.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified, possibly nondefault Kokkos Node type.
void getLocalRowCopy(local_ordinal_type gblRow, nonconst_local_inds_host_view_type &gblColInds, size_t &numColInds) const override
Get a copy of the given row, using local indices.
Teuchos::RCP< const map_type > rowMap_
The Map describing the distribution of rows of the graph.
Teuchos::RCP< const import_type > importer_
The Import from the domain Map to the column Map.
size_t insertCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, InOutIndices &curIndices, size_t &numAssigned, InIndices const &newIndices, std::function< void(const size_t, const size_t, const size_t)> cb=std::function< void(const size_t, const size_t, const size_t)>())
Insert new indices in to current list of indices.
num_row_entries_type k_numRowEntries_
The number of local entries in each locally owned row.
local_inds_device_view_type getLocalIndicesDevice() const
Get a device view of the packed column indicies.
Teuchos::RCP< const map_type > domainMap_
The Map describing the domain of the (matrix corresponding to the) graph.
OffsetType convertColumnIndicesFromGlobalToLocal(const Kokkos::View< LO *, DT > &lclColInds, const Kokkos::View< const GO *, DT > &gblColInds, const Kokkos::View< const OffsetType *, DT > &ptr, const LocalMap< LO, GO, DT > &lclColMap, const Kokkos::View< const NumEntType *, DT > &numRowEnt)
Convert a CrsGraph&#39;s global column indices into local column indices.
global_size_t getGlobalNumRows() const override
Returns the number of global rows in the graph.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
Functions for manipulating CRS arrays.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
void setLocallyModified()
Report that we made a local modification to its structure.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
void checkInternalState() const
Throw an exception if the internal state is not consistent.
bool isNodeGlobalElement(global_ordinal_type globalIndex) const
Whether the given global index is owned by this Map on the calling process.
Sets up and executes a communication plan for a Tpetra DistObject.
size_t findCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, const size_t curNumEntries, Indices1 const &curIndices, Indices2 const &newIndices, Callback &&cb)
Finds offsets in to current list of indices.
static bool verbose()
Whether Tpetra is in verbose mode.
CombineMode
Rule for combining data in an Import or Export.
Teuchos::RCP< const export_type > exporter_
The Export from the row Map to the range Map.
void insertGlobalIndicesIntoNonownedRows(const global_ordinal_type gblRow, const global_ordinal_type gblColInds[], const local_ordinal_type numGblColInds)
Implementation of insertGlobalIndices for nonowned rows.
bool isFillComplete() const override
Whether fillComplete() has been called and the graph is in compute mode.
size_t getNumAllocatedEntriesInLocalRow(local_ordinal_type localRow) const
Current number of allocated entries in the given row on the calling (MPI) process, using a local row index.
void swap(CrsGraph< local_ordinal_type, global_ordinal_type, Node > &graph)
Swaps the data from *this with the data and maps from graph.
void globalAssemble()
Communicate nonlocal contributions to other processes.
RowInfo getRowInfoFromGlobalRowIndex(const global_ordinal_type gblRow) const
Get information about the locally owned row with global index gblRow.
Utility functions for packing and unpacking sparse matrix entries.
size_t getLocalNumCols() const override
Returns the number of columns connected to the locally owned rows of this graph.
size_t getLocalNumEntries() const override
The local number of entries in the graph.
Abstract base class for objects that can be the source of an Import or Export operation.
void removeLocalIndices(local_ordinal_type localRow)
Remove all graph indices from the specified local row.
LocalOrdinal local_ordinal_type
The type of the graph&#39;s local indices.
global_size_t globalNumEntries_
Global number of entries in the graph.
global_inds_wdv_type gblInds_wdv
Global ordinals of column indices for all rows.
local_inds_dualv_type::t_dev::const_type getLocalIndsViewDevice(const RowInfo &rowinfo) const
Get a const, locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myRo...
Teuchos::RCP< const map_type > getRangeMap() const override
Returns the Map associated with the domain of this graph.
virtual void pack(const Teuchos::ArrayView< const local_ordinal_type > &exportLIDs, Teuchos::Array< global_ordinal_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, size_t &constantNumPackets) const override
void enableWDVTracking()
Enable WrappedDualView reference-count tracking and syncing. Call this after exiting a host-parallel ...
row_ptrs_host_view_type getLocalRowPtrsHost() const
Get a host view of the packed row offsets.
void packCrsGraphNew(const CrsGraph< LO, GO, NT > &sourceGraph, const Kokkos::DualView< const LO *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportLIDs, const Kokkos::DualView< const int *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportPIDs, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports, Kokkos::DualView< size_t *, typename CrsGraph< LO, GO, NT >::buffer_device_type > numPacketsPerLID, size_t &constantNumPackets, const bool pack_pids)
Pack specified entries of the given local sparse graph for communication, for &quot;new&quot; DistObject interf...
const row_ptrs_host_view_type & getRowPtrsUnpackedHost() const
Get the unpacked row pointers on host. Lazily make a copy from device.
void replaceColMap(const Teuchos::RCP< const map_type > &newColMap)
Replace the graph&#39;s current column Map with the given Map.
global_size_t getGlobalNumCols() const override
Returns the number of global columns in the graph.
size_t getLocalMaxNumRowEntries() const override
Maximum number of entries in any row of the graph, on this process.
RowInfo getRowInfo(const local_ordinal_type myRow) const
Get information about the locally owned row with local index myRow.
void replaceDomainMapAndImporter(const Teuchos::RCP< const map_type > &newDomainMap, const Teuchos::RCP< const import_type > &newImporter)
Replace the current domain Map and Import with the given parameters.
bool supportsRowViews() const override
Whether this class implements getLocalRowView() and getGlobalRowView() (it does). ...
typename dist_object_type::buffer_device_type buffer_device_type
Kokkos::Device specialization for communication buffers.
local_map_type getLocalMap() const
Get the LocalMap for Kokkos-Kernels.
local_inds_wdv_type lclIndsPacked_wdv
Local ordinals of column indices for all rows Valid when isLocallyIndexed is true Built during fillCo...
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
Details::EStorageStatus storageStatus_
Status of the graph&#39;s storage, when not in a fill-complete state.
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks...
const row_ptrs_device_view_type & getRowPtrsPackedDevice() const
Get the packed row pointers on device.
A parallel distribution of indices over processes.
int makeColMap(Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &colMap, Teuchos::Array< int > &remotePIDs, const Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &domMap, const RowGraph< LO, GO, NT > &graph, const bool sortEachProcsGids=true, std::ostream *errStrm=NULL)
Make the graph&#39;s column Map.
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const LO &)
Number of bytes required to pack or unpack the given value of type value_type.
const row_ptrs_device_view_type & getRowPtrsUnpackedDevice() const
Get the unpacked row pointers on device.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects...
void setParameterList(const Teuchos::RCP< Teuchos::ParameterList > &params) override
Set the given list of parameters (must be nonnull).
size_t getNumEntriesInGlobalRow(global_ordinal_type globalRow) const override
Returns the current number of entries on this node in the specified global row.
void fillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Tell the graph that you are done changing its structure.
void disableWDVTracking()
Disable WrappedDualView reference-count tracking and syncing. Call this before entering a host-parall...
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
void getLocalRowView(const LocalOrdinal lclRow, local_inds_host_view_type &lclColInds) const override
Get a const view of the given local row&#39;s local column indices.
typename Node::device_type device_type
This class&#39; Kokkos device type.
bool isLocallyIndexed() const override
Whether the graph&#39;s column indices are stored as local indices.
A distributed dense vector.
Stand-alone utility functions and macros.
bool isFillActive() const
Whether resumeFill() has been called and the graph is in edit mode.
virtual void copyAndPermute(const SrcDistObject &source, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM) override
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
Returns the communicator.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
void importAndFillComplete(Teuchos::RCP< CrsGraph< local_ordinal_type, global_ordinal_type, Node > > &destGraph, const import_type &importer, const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Import from this to the given destination graph, and make the result fill complete.
global_size_t globalMaxNumRowEntries_
Global maximum of the number of entries in each row.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
size_t insertIndices(RowInfo &rowInfo, const SLocalGlobalViews &newInds, const ELocalGlobal lg, const ELocalGlobal I)
Insert indices into the given row.
global_size_t getGlobalNumEntries() const override
Returns the global number of entries in the graph.
Declaration and definition of Tpetra::Details::getEntryOnHost.
size_t numAllocForAllRows_
The maximum number of entries to allow in each locally owned row.
local_graph_device_type getLocalGraphDevice() const
Get the local graph.
void getGlobalRowView(const global_ordinal_type gblRow, global_inds_host_view_type &gblColInds) const override
Get a const view of the given global row&#39;s global column indices.
Teuchos::RCP< const map_type > colMap_
The Map describing the distribution of columns of the graph.
global_ordinal_type getIndexBase() const override
Returns the index base for global indices for this graph.
typename local_graph_device_type::HostMirror local_graph_host_type
The type of the part of the sparse graph on each MPI process.
void replaceDomainMap(const Teuchos::RCP< const map_type > &newDomainMap)
Replace the current domain Map with the given objects.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
Definition: Tpetra_Util.cpp:71
nonlocals_type nonlocals_
Nonlocal data given to insertGlobalIndices.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const override
Print this object to the given output stream with the given verbosity level.
void computeGlobalConstants()
Compute global constants, if they have not yet been computed.
void getGlobalRowCopy(global_ordinal_type gblRow, nonconst_global_inds_host_view_type &gblColInds, size_t &numColInds) const override
Get a copy of the given row, using global indices.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.