Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_CrsGraph_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_CRSGRAPH_DEF_HPP
11 #define TPETRA_CRSGRAPH_DEF_HPP
12 
15 
20 #include "Tpetra_Details_getGraphDiagOffsets.hpp"
21 #include "Tpetra_Details_getGraphOffRankOffsets.hpp"
22 #include "Tpetra_Details_makeColMap.hpp"
26 #include "Tpetra_Distributor.hpp"
27 #include "Teuchos_SerialDenseMatrix.hpp"
28 #include "Tpetra_Vector.hpp"
29 #include "Tpetra_Import_Util.hpp"
30 #include "Tpetra_Import_Util2.hpp"
31 #include "Tpetra_Details_packCrsGraph.hpp"
32 #include "Tpetra_Details_unpackCrsGraphAndCombine.hpp"
33 #include "Tpetra_Details_CrsPadding.hpp"
34 #include "Tpetra_Util.hpp"
35 #include <algorithm>
36 #include <limits>
37 #include <map>
38 #include <sstream>
39 #include <string>
40 #include <type_traits>
41 #include <utility>
42 #include <vector>
43 
44 namespace Tpetra {
45 namespace Details {
46 namespace Impl {
47 
48 template <class MapIter>
49 void verbosePrintMap(std::ostream& out,
50  MapIter beg,
51  MapIter end,
52  const size_t numEnt,
53  const char mapName[]) {
54  using ::Tpetra::Details::Behavior;
56 
57  out << mapName << ": {";
58  const size_t maxNumToPrint =
60  if (maxNumToPrint == 0) {
61  if (numEnt != 0) {
62  out << "...";
63  }
64  } else {
65  const size_t numToPrint = numEnt > maxNumToPrint ? maxNumToPrint : numEnt;
66  size_t count = 0;
67  for (MapIter it = beg; it != end; ++it) {
68  out << "(" << (*it).first << ", ";
69  verbosePrintArray(out, (*it).second, "gblColInds",
70  maxNumToPrint);
71  out << ")";
72  if (count + size_t(1) < numToPrint) {
73  out << ", ";
74  }
75  ++count;
76  }
77  if (count < numEnt) {
78  out << ", ...";
79  }
80  }
81  out << "}";
82 }
83 
84 template <class LO, class GO, class Node>
85 Teuchos::ArrayView<GO>
86 getRowGraphGlobalRow(
87  std::vector<GO>& gblColIndsStorage,
88  const RowGraph<LO, GO, Node>& graph,
89  const GO gblRowInd) {
90  size_t origNumEnt = graph.getNumEntriesInGlobalRow(gblRowInd);
91  if (gblColIndsStorage.size() < origNumEnt) {
92  gblColIndsStorage.resize(origNumEnt);
93  }
94  typename CrsGraph<LO, GO, Node>::nonconst_global_inds_host_view_type gblColInds(gblColIndsStorage.data(),
95  origNumEnt);
96  graph.getGlobalRowCopy(gblRowInd, gblColInds, origNumEnt);
97  Teuchos::ArrayView<GO> retval(gblColIndsStorage.data(), origNumEnt);
98  return retval;
99 }
100 
101 template <class LO, class GO, class DT, class OffsetType, class NumEntType>
102 class ConvertColumnIndicesFromGlobalToLocal {
103  public:
104  ConvertColumnIndicesFromGlobalToLocal(const ::Kokkos::View<LO*, DT>& lclColInds,
105  const ::Kokkos::View<const GO*, DT>& gblColInds,
106  const ::Kokkos::View<const OffsetType*, DT>& ptr,
107  const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
108  const ::Kokkos::View<const NumEntType*, DT>& numRowEnt)
109  : lclColInds_(lclColInds)
110  , gblColInds_(gblColInds)
111  , ptr_(ptr)
112  , lclColMap_(lclColMap)
113  , numRowEnt_(numRowEnt) {}
114 
115  KOKKOS_FUNCTION void
116  operator()(const LO& lclRow, OffsetType& curNumBad) const {
117  const OffsetType offset = ptr_(lclRow);
118  // NOTE (mfh 26 Jun 2016) It's always legal to cast the number
119  // of entries in a row to LO, as long as the row doesn't have
120  // too many duplicate entries.
121  const LO numEnt = static_cast<LO>(numRowEnt_(lclRow));
122  for (LO j = 0; j < numEnt; ++j) {
123  const GO gid = gblColInds_(offset + j);
124  const LO lid = lclColMap_.getLocalElement(gid);
125  lclColInds_(offset + j) = lid;
126  if (lid == ::Tpetra::Details::OrdinalTraits<LO>::invalid()) {
127  ++curNumBad;
128  }
129  }
130  }
131 
132  static OffsetType
133  run(const ::Kokkos::View<LO*, DT>& lclColInds,
134  const ::Kokkos::View<const GO*, DT>& gblColInds,
135  const ::Kokkos::View<const OffsetType*, DT>& ptr,
136  const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
137  const ::Kokkos::View<const NumEntType*, DT>& numRowEnt) {
138  typedef ::Kokkos::RangePolicy<typename DT::execution_space, LO> range_type;
139  typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> functor_type;
140 
141  const LO lclNumRows = ptr.extent(0) == 0 ? static_cast<LO>(0) : static_cast<LO>(ptr.extent(0) - 1);
142  OffsetType numBad = 0;
143  // Count of "bad" column indices is a reduction over rows.
144  ::Kokkos::parallel_reduce(range_type(0, lclNumRows),
145  functor_type(lclColInds, gblColInds, ptr,
146  lclColMap, numRowEnt),
147  numBad);
148  return numBad;
149  }
150 
151  private:
152  ::Kokkos::View<LO*, DT> lclColInds_;
153  ::Kokkos::View<const GO*, DT> gblColInds_;
154  ::Kokkos::View<const OffsetType*, DT> ptr_;
156  ::Kokkos::View<const NumEntType*, DT> numRowEnt_;
157 };
158 
159 } // namespace Impl
160 
175 template <class LO, class GO, class DT, class OffsetType, class NumEntType>
176 OffsetType
177 convertColumnIndicesFromGlobalToLocal(const Kokkos::View<LO*, DT>& lclColInds,
178  const Kokkos::View<const GO*, DT>& gblColInds,
179  const Kokkos::View<const OffsetType*, DT>& ptr,
180  const LocalMap<LO, GO, DT>& lclColMap,
181  const Kokkos::View<const NumEntType*, DT>& numRowEnt) {
182  using Impl::ConvertColumnIndicesFromGlobalToLocal;
183  typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> impl_type;
184  return impl_type::run(lclColInds, gblColInds, ptr, lclColMap, numRowEnt);
185 }
186 
187 template <class ViewType, class LO>
188 class MaxDifference {
189  public:
190  MaxDifference(const ViewType& ptr)
191  : ptr_(ptr) {}
192 
193  KOKKOS_INLINE_FUNCTION void init(LO& dst) const {
194  dst = 0;
195  }
196 
197  KOKKOS_INLINE_FUNCTION void
198  join(LO& dst, const LO& src) const {
199  dst = (src > dst) ? src : dst;
200  }
201 
202  KOKKOS_INLINE_FUNCTION void
203  operator()(const LO lclRow, LO& maxNumEnt) const {
204  const LO numEnt = static_cast<LO>(ptr_(lclRow + 1) - ptr_(lclRow));
205  maxNumEnt = (numEnt > maxNumEnt) ? numEnt : maxNumEnt;
206  }
207 
208  private:
209  typename ViewType::const_type ptr_;
210 };
211 
212 template <class ViewType, class LO>
213 typename ViewType::non_const_value_type
214 maxDifference(const char kernelLabel[],
215  const ViewType& ptr,
216  const LO lclNumRows) {
217  if (lclNumRows == 0) {
218  // mfh 07 May 2018: Weirdly, I need this special case,
219  // otherwise I get the wrong answer.
220  return static_cast<LO>(0);
221  } else {
222  using execution_space = typename ViewType::execution_space;
223  using range_type = Kokkos::RangePolicy<execution_space, LO>;
224  LO theMaxNumEnt{0};
225  Kokkos::parallel_reduce(kernelLabel,
226  range_type(0, lclNumRows),
227  MaxDifference<ViewType, LO>(ptr),
228  theMaxNumEnt);
229  return theMaxNumEnt;
230  }
231 }
232 
233 } // namespace Details
234 
235 template <class LocalOrdinal, class GlobalOrdinal, class Node>
236 bool CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
237  getDebug() {
238  return Details::Behavior::debug("CrsGraph");
239 }
240 
241 template <class LocalOrdinal, class GlobalOrdinal, class Node>
242 bool CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
243  getVerbose() {
244  return Details::Behavior::verbose("CrsGraph");
245 }
246 
247 template <class LocalOrdinal, class GlobalOrdinal, class Node>
248 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
249  CrsGraph(const Teuchos::RCP<const map_type>& rowMap,
250  const size_t maxNumEntriesPerRow,
251  const Teuchos::RCP<Teuchos::ParameterList>& params)
252  : dist_object_type(rowMap)
253  , rowMap_(rowMap)
254  , numAllocForAllRows_(maxNumEntriesPerRow) {
255  const char tfecfFuncName[] =
256  "CrsGraph(rowMap,maxNumEntriesPerRow,params): ";
257  staticAssertions();
258  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid(),
259  std::invalid_argument,
260  "The allocation hint maxNumEntriesPerRow must be "
261  "a valid size_t value, which in this case means it must not be "
262  "Teuchos::OrdinalTraits<size_t>::invalid().");
263  resumeFill(params);
265 }
266 
267 template <class LocalOrdinal, class GlobalOrdinal, class Node>
269  CrsGraph(const Teuchos::RCP<const map_type>& rowMap,
270  const Teuchos::RCP<const map_type>& colMap,
271  const size_t maxNumEntriesPerRow,
272  const Teuchos::RCP<Teuchos::ParameterList>& params)
273  : dist_object_type(rowMap)
274  , rowMap_(rowMap)
275  , colMap_(colMap)
276  , numAllocForAllRows_(maxNumEntriesPerRow) {
277  const char tfecfFuncName[] =
278  "CrsGraph(rowMap,colMap,maxNumEntriesPerRow,params): ";
279  staticAssertions();
280  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
281  maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid(),
282  std::invalid_argument,
283  "The allocation hint maxNumEntriesPerRow must be "
284  "a valid size_t value, which in this case means it must not be "
285  "Teuchos::OrdinalTraits<size_t>::invalid().");
286  resumeFill(params);
288 }
289 
290 template <class LocalOrdinal, class GlobalOrdinal, class Node>
292  CrsGraph(const Teuchos::RCP<const map_type>& rowMap,
293  const Teuchos::ArrayView<const size_t>& numEntPerRow,
294  const Teuchos::RCP<Teuchos::ParameterList>& params)
295  : dist_object_type(rowMap)
296  , rowMap_(rowMap)
297  , numAllocForAllRows_(0) {
298  const char tfecfFuncName[] =
299  "CrsGraph(rowMap,numEntPerRow,params): ";
300  staticAssertions();
301 
302  const size_t lclNumRows = rowMap.is_null() ? static_cast<size_t>(0) : rowMap->getLocalNumElements();
303  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
304  static_cast<size_t>(numEntPerRow.size()) != lclNumRows,
305  std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size() << " != the local number of rows " << lclNumRows << " as specified by "
306  "the input row Map.");
307 
308  if (debug_) {
309  for (size_t r = 0; r < lclNumRows; ++r) {
310  const size_t curRowCount = numEntPerRow[r];
311  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(curRowCount == Teuchos::OrdinalTraits<size_t>::invalid(),
312  std::invalid_argument, "numEntPerRow(" << r << ") "
313  "specifies an invalid number of entries "
314  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
315  }
316  }
317 
318  // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
319  // The latter is a const View, so we have to copy into a nonconst
320  // View first, then assign.
321  typedef decltype(k_numAllocPerRow_) out_view_type;
322  typedef typename out_view_type::non_const_type nc_view_type;
323  typedef Kokkos::View<const size_t*,
324  typename nc_view_type::array_layout,
325  Kokkos::HostSpace,
326  Kokkos::MemoryUnmanaged>
327  in_view_type;
328  in_view_type numAllocPerRowIn(numEntPerRow.getRawPtr(), lclNumRows);
329  nc_view_type numAllocPerRowOut("Tpetra::CrsGraph::numAllocPerRow",
330  lclNumRows);
331  // DEEP_COPY REVIEW - HOST-TO-HOSTMIRROR
332  using exec_space = typename nc_view_type::execution_space;
333  Kokkos::deep_copy(exec_space(), numAllocPerRowOut, numAllocPerRowIn);
334  k_numAllocPerRow_ = numAllocPerRowOut;
335 
336  resumeFill(params);
338 }
339 
340 template <class LocalOrdinal, class GlobalOrdinal, class Node>
342  CrsGraph(const Teuchos::RCP<const map_type>& rowMap,
343  const Kokkos::DualView<const size_t*, device_type>& numEntPerRow,
344  const Teuchos::RCP<Teuchos::ParameterList>& params)
345  : dist_object_type(rowMap)
346  , rowMap_(rowMap)
347  , k_numAllocPerRow_(numEntPerRow.view_host())
348  , numAllocForAllRows_(0) {
349  const char tfecfFuncName[] =
350  "CrsGraph(rowMap,numEntPerRow,params): ";
351  staticAssertions();
352 
353  const size_t lclNumRows = rowMap.is_null() ? static_cast<size_t>(0) : rowMap->getLocalNumElements();
354  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
355  static_cast<size_t>(numEntPerRow.extent(0)) != lclNumRows,
356  std::invalid_argument, "numEntPerRow has length " << numEntPerRow.extent(0) << " != the local number of rows " << lclNumRows << " as specified by "
357  "the input row Map.");
358 
359  if (debug_) {
360  for (size_t r = 0; r < lclNumRows; ++r) {
361  const size_t curRowCount = numEntPerRow.view_host()(r);
362  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(curRowCount == Teuchos::OrdinalTraits<size_t>::invalid(),
363  std::invalid_argument, "numEntPerRow(" << r << ") "
364  "specifies an invalid number of entries "
365  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
366  }
367  }
368 
369  resumeFill(params);
371 }
372 
373 template <class LocalOrdinal, class GlobalOrdinal, class Node>
375  CrsGraph(const Teuchos::RCP<const map_type>& rowMap,
376  const Teuchos::RCP<const map_type>& colMap,
377  const Kokkos::DualView<const size_t*, device_type>& numEntPerRow,
378  const Teuchos::RCP<Teuchos::ParameterList>& params)
379  : dist_object_type(rowMap)
380  , rowMap_(rowMap)
381  , colMap_(colMap)
382  , k_numAllocPerRow_(numEntPerRow.view_host())
383  , numAllocForAllRows_(0) {
384  const char tfecfFuncName[] =
385  "CrsGraph(rowMap,colMap,numEntPerRow,params): ";
386  staticAssertions();
387 
388  const size_t lclNumRows = rowMap.is_null() ? static_cast<size_t>(0) : rowMap->getLocalNumElements();
389  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
390  static_cast<size_t>(numEntPerRow.extent(0)) != lclNumRows,
391  std::invalid_argument, "numEntPerRow has length " << numEntPerRow.extent(0) << " != the local number of rows " << lclNumRows << " as specified by "
392  "the input row Map.");
393 
394  if (debug_) {
395  for (size_t r = 0; r < lclNumRows; ++r) {
396  const size_t curRowCount = numEntPerRow.view_host()(r);
397  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(curRowCount == Teuchos::OrdinalTraits<size_t>::invalid(),
398  std::invalid_argument, "numEntPerRow(" << r << ") "
399  "specifies an invalid number of entries "
400  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
401  }
402  }
403 
404  resumeFill(params);
406 }
407 
408 template <class LocalOrdinal, class GlobalOrdinal, class Node>
410  CrsGraph(const Teuchos::RCP<const map_type>& rowMap,
411  const Teuchos::RCP<const map_type>& colMap,
412  const Teuchos::ArrayView<const size_t>& numEntPerRow,
413  const Teuchos::RCP<Teuchos::ParameterList>& params)
414  : dist_object_type(rowMap)
415  , rowMap_(rowMap)
416  , colMap_(colMap)
417  , numAllocForAllRows_(0) {
418  const char tfecfFuncName[] =
419  "CrsGraph(rowMap,colMap,numEntPerRow,params): ";
420  staticAssertions();
421 
422  const size_t lclNumRows = rowMap.is_null() ? static_cast<size_t>(0) : rowMap->getLocalNumElements();
423  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
424  static_cast<size_t>(numEntPerRow.size()) != lclNumRows,
425  std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size() << " != the local number of rows " << lclNumRows << " as specified by "
426  "the input row Map.");
427 
428  if (debug_) {
429  for (size_t r = 0; r < lclNumRows; ++r) {
430  const size_t curRowCount = numEntPerRow[r];
431  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(curRowCount == Teuchos::OrdinalTraits<size_t>::invalid(),
432  std::invalid_argument, "numEntPerRow(" << r << ") "
433  "specifies an invalid number of entries "
434  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
435  }
436  }
437 
438  // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
439  // The latter is a const View, so we have to copy into a nonconst
440  // View first, then assign.
441  typedef decltype(k_numAllocPerRow_) out_view_type;
442  typedef typename out_view_type::non_const_type nc_view_type;
443  typedef Kokkos::View<const size_t*,
444  typename nc_view_type::array_layout,
445  Kokkos::HostSpace,
446  Kokkos::MemoryUnmanaged>
447  in_view_type;
448  in_view_type numAllocPerRowIn(numEntPerRow.getRawPtr(), lclNumRows);
449  nc_view_type numAllocPerRowOut("Tpetra::CrsGraph::numAllocPerRow",
450  lclNumRows);
451  // DEEP_COPY REVIEW - HOST-TO-HOSTMIRROR
452  using exec_space = typename nc_view_type::execution_space;
453  Kokkos::deep_copy(exec_space(), numAllocPerRowOut, numAllocPerRowIn);
454  k_numAllocPerRow_ = numAllocPerRowOut;
455 
456  resumeFill(params);
458 }
459 
460 template <class LocalOrdinal, class GlobalOrdinal, class Node>
463  const Teuchos::RCP<const map_type>& rowMap,
464  const Teuchos::RCP<Teuchos::ParameterList>& params)
465  : dist_object_type(rowMap)
466  , rowMap_(rowMap)
467  , colMap_(originalGraph.colMap_)
468  , numAllocForAllRows_(originalGraph.numAllocForAllRows_)
469  , storageStatus_(originalGraph.storageStatus_)
470  , indicesAreAllocated_(originalGraph.indicesAreAllocated_)
471  , indicesAreLocal_(originalGraph.indicesAreLocal_)
472  , indicesAreSorted_(originalGraph.indicesAreSorted_) {
473  staticAssertions();
474 
475  int numRows = rowMap->getLocalNumElements();
476  size_t numNonZeros = originalGraph.getRowPtrsPackedHost()(numRows);
477  auto rowsToUse = Kokkos::pair<size_t, size_t>(0, numRows + 1);
478 
479  this->setRowPtrsUnpacked(Kokkos::subview(originalGraph.getRowPtrsUnpackedDevice(), rowsToUse));
480  this->setRowPtrsPacked(Kokkos::subview(originalGraph.getRowPtrsPackedDevice(), rowsToUse));
481 
482  if (indicesAreLocal_) {
483  lclIndsUnpacked_wdv = local_inds_wdv_type(originalGraph.lclIndsUnpacked_wdv, 0, numNonZeros);
484  lclIndsPacked_wdv = local_inds_wdv_type(originalGraph.lclIndsPacked_wdv, 0, numNonZeros);
485  } else {
486  gblInds_wdv = global_inds_wdv_type(originalGraph.gblInds_wdv, 0, numNonZeros);
487  }
488 
490 }
491 
492 template <class LocalOrdinal, class GlobalOrdinal, class Node>
494  CrsGraph(const Teuchos::RCP<const map_type>& rowMap,
495  const Teuchos::RCP<const map_type>& colMap,
496  const typename local_graph_device_type::row_map_type& rowPointers,
497  const typename local_graph_device_type::entries_type::non_const_type& columnIndices,
498  const Teuchos::RCP<Teuchos::ParameterList>& params)
499  : dist_object_type(rowMap)
500  , rowMap_(rowMap)
501  , colMap_(colMap)
502  , numAllocForAllRows_(0)
503  , storageStatus_(Details::STORAGE_1D_PACKED)
504  , indicesAreAllocated_(true)
505  , indicesAreLocal_(true) {
506  staticAssertions();
507  if (!params.is_null() && params->isParameter("sorted") &&
508  !params->get<bool>("sorted")) {
509  indicesAreSorted_ = false;
510  } else {
511  indicesAreSorted_ = true;
512  }
513  setAllIndices(rowPointers, columnIndices);
515 }
516 
517 template <class LocalOrdinal, class GlobalOrdinal, class Node>
519  CrsGraph(const Teuchos::RCP<const map_type>& rowMap,
520  const Teuchos::RCP<const map_type>& colMap,
521  const Teuchos::ArrayRCP<size_t>& rowPointers,
522  const Teuchos::ArrayRCP<LocalOrdinal>& columnIndices,
523  const Teuchos::RCP<Teuchos::ParameterList>& params)
524  : dist_object_type(rowMap)
525  , rowMap_(rowMap)
526  , colMap_(colMap)
527  , numAllocForAllRows_(0)
528  , storageStatus_(Details::STORAGE_1D_PACKED)
529  , indicesAreAllocated_(true)
530  , indicesAreLocal_(true) {
531  staticAssertions();
532  if (!params.is_null() && params->isParameter("sorted") &&
533  !params->get<bool>("sorted")) {
534  indicesAreSorted_ = false;
535  } else {
536  indicesAreSorted_ = true;
537  }
538  setAllIndices(rowPointers, columnIndices);
540 }
541 
542 template <class LocalOrdinal, class GlobalOrdinal, class Node>
544  CrsGraph(const Teuchos::RCP<const map_type>& rowMap,
545  const Teuchos::RCP<const map_type>& colMap,
546  const local_graph_device_type& k_local_graph_,
547  const Teuchos::RCP<Teuchos::ParameterList>& params)
548  : CrsGraph(k_local_graph_,
549  rowMap,
550  colMap,
551  Teuchos::null,
552  Teuchos::null,
553  params) {}
554 
555 template <class LocalOrdinal, class GlobalOrdinal, class Node>
557  CrsGraph(const local_graph_device_type& k_local_graph_,
558  const Teuchos::RCP<const map_type>& rowMap,
559  const Teuchos::RCP<const map_type>& colMap,
560  const Teuchos::RCP<const map_type>& domainMap,
561  const Teuchos::RCP<const map_type>& rangeMap,
562  const Teuchos::RCP<Teuchos::ParameterList>& params)
563  : DistObject<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, node_type>(rowMap)
564  , rowMap_(rowMap)
565  , colMap_(colMap)
566  , numAllocForAllRows_(0)
567  , storageStatus_(Details::STORAGE_1D_PACKED)
568  , indicesAreAllocated_(true)
569  , indicesAreLocal_(true) {
570  staticAssertions();
571  const char tfecfFuncName[] = "CrsGraph(Kokkos::LocalStaticCrsGraph,Map,Map,Map,Map)";
572 
573  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
574  colMap.is_null(), std::runtime_error,
575  ": The input column Map must be nonnull.");
576  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
577  k_local_graph_.numRows() != rowMap->getLocalNumElements(),
578  std::runtime_error,
579  ": The input row Map and the input local graph need to have the same "
580  "number of rows. The row Map claims "
581  << rowMap->getLocalNumElements()
582  << " row(s), but the local graph claims " << k_local_graph_.numRows()
583  << " row(s).");
584 
585  // NOTE (mfh 17 Mar 2014) getLocalNumRows() returns
586  // rowMap_->getLocalNumElements(), but it doesn't have to.
587  // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
588  // k_local_graph_.numRows () != getLocalNumRows (), std::runtime_error,
589  // ": The input row Map and the input local graph need to have the same "
590  // "number of rows. The row Map claims " << getLocalNumRows () << " row(s), "
591  // "but the local graph claims " << k_local_graph_.numRows () << " row(s).");
592  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
593  lclIndsUnpacked_wdv.extent(0) != 0 || gblInds_wdv.extent(0) != 0, std::logic_error,
594  ": cannot have 1D data structures allocated.");
595 
596  if (!params.is_null() && params->isParameter("sorted") &&
597  !params->get<bool>("sorted")) {
598  indicesAreSorted_ = false;
599  } else {
600  indicesAreSorted_ = true;
601  }
602 
603  setDomainRangeMaps(domainMap.is_null() ? rowMap_ : domainMap,
604  rangeMap.is_null() ? rowMap_ : rangeMap);
605  Teuchos::Array<int> remotePIDs(0); // unused output argument
606  this->makeImportExport(remotePIDs, false);
607 
608  lclIndsPacked_wdv = local_inds_wdv_type(k_local_graph_.entries);
610  this->setRowPtrs(k_local_graph_.row_map);
611 
612  set_need_sync_host_uvm_access(); // lclGraph_ potentially still in a kernel
613 
614  const bool callComputeGlobalConstants = params.get() == nullptr ||
615  params->get("compute global constants", true);
616 
617  if (callComputeGlobalConstants) {
618  this->computeGlobalConstants();
619  }
620  this->fillComplete_ = true;
621  this->checkInternalState();
622 }
623 
624 template <class LocalOrdinal, class GlobalOrdinal, class Node>
627  const Teuchos::RCP<const map_type>& rowMap,
628  const Teuchos::RCP<const map_type>& colMap,
629  const Teuchos::RCP<const map_type>& domainMap,
630  const Teuchos::RCP<const map_type>& rangeMap,
631  const Teuchos::RCP<const import_type>& importer,
632  const Teuchos::RCP<const export_type>& exporter,
633  const Teuchos::RCP<Teuchos::ParameterList>& params)
634  : DistObject<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, node_type>(rowMap)
635  , rowMap_(rowMap)
636  , colMap_(colMap)
637  , rangeMap_(rangeMap.is_null() ? rowMap : rangeMap)
638  , domainMap_(domainMap.is_null() ? rowMap : domainMap)
639  , importer_(importer)
640  , exporter_(exporter)
641  , numAllocForAllRows_(0)
642  , storageStatus_(Details::STORAGE_1D_PACKED)
643  , indicesAreAllocated_(true)
644  , indicesAreLocal_(true) {
645  staticAssertions();
646  const char tfecfFuncName[] =
647  "Tpetra::CrsGraph(local_graph_device_type,"
648  "Map,Map,Map,Map,Import,Export,params): ";
649 
650  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(colMap.is_null(), std::runtime_error,
651  "The input column Map must be nonnull.");
652 
653  lclIndsPacked_wdv = local_inds_wdv_type(lclGraph.entries);
655  setRowPtrs(lclGraph.row_map);
656 
657  set_need_sync_host_uvm_access(); // lclGraph_ potentially still in a kernel
658 
659  if (!params.is_null() && params->isParameter("sorted") &&
660  !params->get<bool>("sorted")) {
661  indicesAreSorted_ = false;
662  } else {
663  indicesAreSorted_ = true;
664  }
665 
666  const bool callComputeGlobalConstants =
667  params.get() == nullptr ||
668  params->get("compute global constants", true);
669  if (callComputeGlobalConstants) {
670  this->computeGlobalConstants();
671  }
672  fillComplete_ = true;
674 }
675 
676 template <class LocalOrdinal, class GlobalOrdinal, class Node>
678  CrsGraph(const row_ptrs_device_view_type& rowPointers,
679  const local_inds_wdv_type& columnIndices,
680  const Teuchos::RCP<const map_type>& rowMap,
681  const Teuchos::RCP<const map_type>& colMap,
682  const Teuchos::RCP<const map_type>& domainMap,
683  const Teuchos::RCP<const map_type>& rangeMap,
684  const Teuchos::RCP<const import_type>& importer,
685  const Teuchos::RCP<const export_type>& exporter,
686  const Teuchos::RCP<Teuchos::ParameterList>& params)
687  : DistObject<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, node_type>(rowMap)
688  , rowMap_(rowMap)
689  , colMap_(colMap)
690  , rangeMap_(rangeMap.is_null() ? rowMap : rangeMap)
691  , domainMap_(domainMap.is_null() ? rowMap : domainMap)
692  , importer_(importer)
693  , exporter_(exporter)
694  , numAllocForAllRows_(0)
695  , storageStatus_(Details::STORAGE_1D_PACKED)
696  , indicesAreAllocated_(true)
697  , indicesAreLocal_(true) {
698  staticAssertions();
699  const char tfecfFuncName[] =
700  "Tpetra::CrsGraph(row_ptrs_device_view_type,local_inds_wdv_type"
701  "Map,Map,Map,Map,Import,Export,params): ";
702 
703  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(colMap.is_null(), std::runtime_error,
704  "The input column Map must be nonnull.");
705 
706  lclIndsPacked_wdv = columnIndices;
708  setRowPtrs(rowPointers);
709 
710  set_need_sync_host_uvm_access(); // lclGraph_ potentially still in a kernel
711 
712  if (!params.is_null() && params->isParameter("sorted") &&
713  !params->get<bool>("sorted")) {
714  indicesAreSorted_ = false;
715  } else {
716  indicesAreSorted_ = true;
717  }
718 
719  const bool callComputeGlobalConstants =
720  params.get() == nullptr ||
721  params->get("compute global constants", true);
722  if (callComputeGlobalConstants) {
723  this->computeGlobalConstants();
724  }
725  fillComplete_ = true;
727 }
728 
729 template <class LocalOrdinal, class GlobalOrdinal, class Node>
730 Teuchos::RCP<const Teuchos::ParameterList>
733  using Teuchos::ParameterList;
734  using Teuchos::parameterList;
735  using Teuchos::RCP;
736 
737  RCP<ParameterList> params = parameterList("Tpetra::CrsGraph");
738 
739  // Make a sublist for the Import.
740  RCP<ParameterList> importSublist = parameterList("Import");
741 
742  // FIXME (mfh 02 Apr 2012) We should really have the Import and
743  // Export objects fill in these lists. However, we don't want to
744  // create an Import or Export unless we need them. For now, we
745  // know that the Import and Export just pass the list directly to
746  // their Distributor, so we can create a Distributor here
747  // (Distributor's constructor is a lightweight operation) and have
748  // it fill in the list.
749 
750  // Fill in Distributor default parameters by creating a
751  // Distributor and asking it to do the work.
752  Distributor distributor(rowMap_->getComm(), importSublist);
753  params->set("Import", *importSublist, "How the Import performs communication.");
754 
755  // Make a sublist for the Export. For now, it's a clone of the
756  // Import sublist. It's not a shallow copy, though, since we
757  // might like the Import to do communication differently than the
758  // Export.
759  params->set("Export", *importSublist, "How the Export performs communication.");
760 
761  return params;
762 }
763 
764 template <class LocalOrdinal, class GlobalOrdinal, class Node>
766  setParameterList(const Teuchos::RCP<Teuchos::ParameterList>& params) {
767  Teuchos::RCP<const Teuchos::ParameterList> validParams =
768  getValidParameters();
769  params->validateParametersAndSetDefaults(*validParams);
770  this->setMyParamList(params);
771 }
772 
773 template <class LocalOrdinal, class GlobalOrdinal, class Node>
777  return rowMap_->getGlobalNumElements();
778 }
779 
780 template <class LocalOrdinal, class GlobalOrdinal, class Node>
784  const char tfecfFuncName[] = "getGlobalNumCols: ";
785  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
786  !isFillComplete() || getDomainMap().is_null(), std::runtime_error,
787  "The graph does not have a domain Map. You may not call this method in "
788  "that case.");
789  return getDomainMap()->getGlobalNumElements();
790 }
791 
792 template <class LocalOrdinal, class GlobalOrdinal, class Node>
793 size_t
796  return this->rowMap_.is_null() ? static_cast<size_t>(0) : this->rowMap_->getLocalNumElements();
797 }
798 
799 template <class LocalOrdinal, class GlobalOrdinal, class Node>
800 size_t
803  const char tfecfFuncName[] = "getLocalNumCols: ";
804  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
805  !hasColMap(), std::runtime_error,
806  "The graph does not have a column Map. You may not call this method "
807  "unless the graph has a column Map. This requires either that a custom "
808  "column Map was given to the constructor, or that fillComplete() has "
809  "been called.");
810  return colMap_.is_null() ? static_cast<size_t>(0) : colMap_->getLocalNumElements();
811 }
812 
813 template <class LocalOrdinal, class GlobalOrdinal, class Node>
814 Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
816  getRowMap() const {
817  return rowMap_;
818 }
819 
820 template <class LocalOrdinal, class GlobalOrdinal, class Node>
821 Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
823  getColMap() const {
824  return colMap_;
825 }
826 
827 template <class LocalOrdinal, class GlobalOrdinal, class Node>
828 Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
830  getDomainMap() const {
831  return domainMap_;
832 }
833 
834 template <class LocalOrdinal, class GlobalOrdinal, class Node>
835 Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
837  getRangeMap() const {
838  return rangeMap_;
839 }
840 
841 template <class LocalOrdinal, class GlobalOrdinal, class Node>
842 Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::import_type>
844  getImporter() const {
845  return importer_;
846 }
847 
848 template <class LocalOrdinal, class GlobalOrdinal, class Node>
849 Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::export_type>
851  getExporter() const {
852  return exporter_;
853 }
854 
855 template <class LocalOrdinal, class GlobalOrdinal, class Node>
857  hasColMap() const {
858  return !colMap_.is_null();
859 }
860 
861 template <class LocalOrdinal, class GlobalOrdinal, class Node>
864  // FIXME (mfh 07 Aug 2014) Why wouldn't storage be optimized if
865  // getLocalNumRows() is zero?
866 
867  const bool isOpt = indicesAreAllocated_ &&
868  k_numRowEntries_.extent(0) == 0 &&
869  getLocalNumRows() > 0;
870 
871  return isOpt;
872 }
873 
874 template <class LocalOrdinal, class GlobalOrdinal, class Node>
878  const char tfecfFuncName[] = "getGlobalNumEntries: ";
879  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->haveGlobalConstants_, std::logic_error,
880  "The graph does not have global constants computed, "
881  "but the user has requested them.");
882 
883  return globalNumEntries_;
884 }
885 
886 template <class LocalOrdinal, class GlobalOrdinal, class Node>
887 size_t
890  const char tfecfFuncName[] = "getLocalNumEntries: ";
891  typedef LocalOrdinal LO;
892 
893  if (this->indicesAreAllocated_) {
894  const LO lclNumRows = this->getLocalNumRows();
895  if (lclNumRows == 0) {
896  return static_cast<size_t>(0);
897  } else {
898  // Avoid the "*this capture" issue by creating a local Kokkos::View.
899  auto numEntPerRow = this->k_numRowEntries_;
900  const LO numNumEntPerRow = numEntPerRow.extent(0);
901  if (numNumEntPerRow == 0) {
902  if (static_cast<LO>(this->getRowPtrsPackedDevice().extent(0)) <
903  static_cast<LO>(lclNumRows + 1)) {
904  return static_cast<size_t>(0);
905  } else {
906  // indices are allocated and k_numRowEntries_ is not allocated,
907  // so we have packed storage and the length of lclIndsPacked_wdv
908  // must be the number of local entries.
909  if (debug_) {
910  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->getRowPtrsPackedHost()(lclNumRows) != lclIndsPacked_wdv.extent(0), std::logic_error,
911  "Final entry of packed host rowptrs doesn't match the length of lclIndsPacked");
912  }
913  return lclIndsPacked_wdv.extent(0);
914  }
915  } else { // k_numRowEntries_ is populated
916  // k_numRowEntries_ is actually be a host View, so we run
917  // the sum in its native execution space. This also means
918  // that we can use explicit capture (which could perhaps
919  // improve build time) instead of KOKKOS_LAMBDA, and avoid
920  // any CUDA build issues with trying to run a __device__ -
921  // only function on host.
922  typedef typename num_row_entries_type::execution_space
923  host_exec_space;
924  typedef Kokkos::RangePolicy<host_exec_space, LO> range_type;
925 
926  const LO upperLoopBound = lclNumRows < numNumEntPerRow ? lclNumRows : numNumEntPerRow;
927  size_t nodeNumEnt = 0;
928  Kokkos::parallel_reduce(
929  "Tpetra::CrsGraph::getNumNodeEntries",
930  range_type(0, upperLoopBound),
931  [=](const LO& k, size_t& lclSum) {
932  lclSum += numEntPerRow(k);
933  },
934  nodeNumEnt);
935  return nodeNumEnt;
936  }
937  }
938  } else { // nothing allocated on this process, so no entries
939  return static_cast<size_t>(0);
940  }
941 }
942 
943 template <class LocalOrdinal, class GlobalOrdinal, class Node>
947  const char tfecfFuncName[] = "getGlobalMaxNumRowEntries: ";
948  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->haveGlobalConstants_, std::logic_error,
949  "The graph does not have global constants computed, "
950  "but the user has requested them.");
951 
952  return globalMaxNumRowEntries_;
953 }
954 
955 template <class LocalOrdinal, class GlobalOrdinal, class Node>
956 size_t
959  return nodeMaxNumRowEntries_;
960 }
961 
962 template <class LocalOrdinal, class GlobalOrdinal, class Node>
965  return fillComplete_;
966 }
967 
968 template <class LocalOrdinal, class GlobalOrdinal, class Node>
970  isFillActive() const {
971  return !fillComplete_;
972 }
973 
974 template <class LocalOrdinal, class GlobalOrdinal, class Node>
977  return indicesAreLocal_;
978 }
979 
980 template <class LocalOrdinal, class GlobalOrdinal, class Node>
983  return indicesAreGlobal_;
984 }
985 
986 template <class LocalOrdinal, class GlobalOrdinal, class Node>
987 size_t
990  typedef LocalOrdinal LO;
991 
992  if (this->indicesAreAllocated_) {
993  const LO lclNumRows = this->getLocalNumRows();
994  if (lclNumRows == 0) {
995  return static_cast<size_t>(0);
996  } else if (storageStatus_ == Details::STORAGE_1D_PACKED) {
997  if (static_cast<LO>(this->getRowPtrsPackedDevice().extent(0)) <
998  static_cast<LO>(lclNumRows + 1)) {
999  return static_cast<size_t>(0);
1000  } else {
1001  if (this->isLocallyIndexed())
1002  return lclIndsPacked_wdv.extent(0);
1003  else
1004  return gblInds_wdv.extent(0);
1005  }
1006  } else if (storageStatus_ == Details::STORAGE_1D_UNPACKED) {
1007  auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
1008  if (rowPtrsUnpacked_host.extent(0) == 0) {
1009  return static_cast<size_t>(0);
1010  } else {
1011  if (this->isLocallyIndexed())
1012  return lclIndsUnpacked_wdv.extent(0);
1013  else
1014  return gblInds_wdv.extent(0);
1015  }
1016  } else {
1017  return static_cast<size_t>(0);
1018  }
1019  } else {
1020  return Tpetra::Details::OrdinalTraits<size_t>::invalid();
1021  }
1022 }
1023 
1024 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1025 Teuchos::RCP<const Teuchos::Comm<int>>
1027  getComm() const {
1028  return this->rowMap_.is_null() ? Teuchos::null : this->rowMap_->getComm();
1029 }
1030 
1031 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1032 GlobalOrdinal
1034  getIndexBase() const {
1035  return rowMap_->getIndexBase();
1036 }
1037 
1038 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1040  indicesAreAllocated() const {
1041  return indicesAreAllocated_;
1042 }
1043 
1044 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1046  isSorted() const {
1047  return indicesAreSorted_;
1048 }
1049 
1050 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1052  isMerged() const {
1053  return noRedundancies_;
1054 }
1055 
1056 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1059  // FIXME (mfh 07 May 2013) How do we know that the change
1060  // introduced a redundancy, or even that it invalidated the sorted
1061  // order of indices? CrsGraph has always made this conservative
1062  // guess. It could be a bit costly to check at insertion time,
1063  // though.
1064  indicesAreSorted_ = false;
1065  noRedundancies_ = false;
1066 
1067  // We've modified the graph, so we'll have to recompute local
1068  // constants like the number of diagonal entries on this process.
1069  haveLocalConstants_ = false;
1070 }
1071 
1072 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1074  allocateIndices(const ELocalGlobal lg, const bool verbose) {
1076  using std::endl;
1077  using Teuchos::arcp;
1078  using Teuchos::Array;
1079  using Teuchos::ArrayRCP;
1080  typedef Teuchos::ArrayRCP<size_t>::size_type size_type;
1081  typedef typename local_graph_device_type::row_map_type::non_const_type
1082  non_const_row_map_type;
1083  const char tfecfFuncName[] = "allocateIndices: ";
1084  const char suffix[] =
1085  " Please report this bug to the Tpetra developers.";
1086  ProfilingRegion profRegion("Tpetra::CrsGraph::allocateIndices");
1087 
1088  std::unique_ptr<std::string> prefix;
1089  if (verbose) {
1090  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
1091  std::ostringstream os;
1092  os << *prefix << "Start: lg="
1093  << (lg == GlobalIndices ? "GlobalIndices" : "LocalIndices")
1094  << ", numRows: " << this->getLocalNumRows() << endl;
1095  std::cerr << os.str();
1096  }
1097 
1098  // This is a protected function, only callable by us. If it was
1099  // called incorrectly, it is our fault. That's why the tests
1100  // below throw std::logic_error instead of std::invalid_argument.
1101  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isLocallyIndexed() && lg == GlobalIndices, std::logic_error,
1102  ": The graph is locally indexed, but Tpetra code is calling "
1103  "this method with lg=GlobalIndices."
1104  << suffix);
1105  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isGloballyIndexed() && lg == LocalIndices, std::logic_error,
1106  ": The graph is globally indexed, but Tpetra code is calling "
1107  "this method with lg=LocalIndices."
1108  << suffix);
1109  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(indicesAreAllocated(), std::logic_error,
1110  ": The graph's "
1111  "indices are already allocated, but Tpetra is calling "
1112  "allocateIndices again."
1113  << suffix);
1114  const size_t numRows = this->getLocalNumRows();
1115 
1116  //
1117  // STATIC ALLOCATION PROFILE
1118  //
1119  size_type numInds = 0;
1120  {
1121  if (verbose) {
1122  std::ostringstream os;
1123  os << *prefix << "Allocate k_rowPtrs: " << (numRows + 1) << endl;
1124  std::cerr << os.str();
1125  }
1126  non_const_row_map_type k_rowPtrs("Tpetra::CrsGraph::ptr", numRows + 1);
1127 
1128  if (this->k_numAllocPerRow_.extent(0) != 0) {
1129  // It's OK to throw std::invalid_argument here, because we
1130  // haven't incurred any side effects yet. Throwing that
1131  // exception (and not, say, std::logic_error) implies that the
1132  // instance can recover.
1133  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->k_numAllocPerRow_.extent(0) != numRows,
1134  std::invalid_argument,
1135  "k_numAllocPerRow_ is allocated, that is, "
1136  "has nonzero length "
1137  << this->k_numAllocPerRow_.extent(0)
1138  << ", but its length != numRows = " << numRows << ".");
1139 
1140  // k_numAllocPerRow_ is a host View, but k_rowPtrs (the thing
1141  // we want to compute here) lives on device. That's OK;
1142  // computeOffsetsFromCounts can handle this case.
1144 
1145  // FIXME (mfh 27 Jun 2016) Currently, computeOffsetsFromCounts
1146  // doesn't attempt to check its input for "invalid" flag
1147  // values. For now, we omit that feature of the sequential
1148  // code disabled below.
1149  numInds = computeOffsetsFromCounts(k_rowPtrs, k_numAllocPerRow_);
1150  } else {
1151  // It's OK to throw std::invalid_argument here, because we
1152  // haven't incurred any side effects yet. Throwing that
1153  // exception (and not, say, std::logic_error) implies that the
1154  // instance can recover.
1155  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->numAllocForAllRows_ ==
1156  Tpetra::Details::OrdinalTraits<size_t>::invalid(),
1157  std::invalid_argument,
1158  "numAllocForAllRows_ has an invalid value, "
1159  "namely Tpetra::Details::OrdinalTraits<size_t>::invalid() = "
1160  << Tpetra::Details::OrdinalTraits<size_t>::invalid() << ".");
1161 
1163  numInds = computeOffsetsFromConstantCount(k_rowPtrs, this->numAllocForAllRows_);
1164  }
1165  // "Commit" the resulting row offsets.
1166  setRowPtrsUnpacked(k_rowPtrs);
1167  }
1168  if (debug_) {
1169  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numInds != size_type(this->getRowPtrsUnpackedHost()(numRows)), std::logic_error,
1170  ": Number of indices produced by computeOffsetsFrom[Constant]Counts "
1171  "does not match final entry of rowptrs unpacked");
1172  }
1173 
1174  if (lg == LocalIndices) {
1175  if (verbose) {
1176  std::ostringstream os;
1177  os << *prefix << "Allocate local column indices "
1178  "lclIndsUnpacked_wdv: "
1179  << numInds << endl;
1180  std::cerr << os.str();
1181  }
1182  lclIndsUnpacked_wdv = local_inds_wdv_type(
1183  local_inds_dualv_type("Tpetra::CrsGraph::lclInd", numInds));
1184  } else {
1185  if (verbose) {
1186  std::ostringstream os;
1187  os << *prefix << "Allocate global column indices "
1188  "gblInds_wdv: "
1189  << numInds << endl;
1190  std::cerr << os.str();
1191  }
1192  gblInds_wdv = global_inds_wdv_type(
1193  global_inds_dualv_type("Tpetra::CrsGraph::gblInd", numInds));
1194  }
1195  storageStatus_ = Details::STORAGE_1D_UNPACKED;
1196 
1197  this->indicesAreLocal_ = (lg == LocalIndices);
1198  this->indicesAreGlobal_ = (lg == GlobalIndices);
1199 
1200  if (numRows > 0) { // reallocate k_numRowEntries_ & fill w/ 0s
1201  using Kokkos::ViewAllocateWithoutInitializing;
1202  const char label[] = "Tpetra::CrsGraph::numRowEntries";
1203  if (verbose) {
1204  std::ostringstream os;
1205  os << *prefix << "Allocate k_numRowEntries_: " << numRows
1206  << endl;
1207  std::cerr << os.str();
1208  }
1209  num_row_entries_type numRowEnt(ViewAllocateWithoutInitializing(label), numRows);
1210  // DEEP_COPY REVIEW - VALUE-TO-HOSTMIRROR
1211  Kokkos::deep_copy(execution_space(), numRowEnt, static_cast<size_t>(0)); // fill w/ 0s
1212  Kokkos::fence("CrsGraph::allocateIndices"); // TODO: Need to understand downstream failure points and move this fence.
1213  this->k_numRowEntries_ = numRowEnt; // "commit" our allocation
1214  }
1215 
1216  // Once indices are allocated, CrsGraph needs to free this information.
1217  this->numAllocForAllRows_ = 0;
1218  this->k_numAllocPerRow_ = decltype(k_numAllocPerRow_)();
1219  this->indicesAreAllocated_ = true;
1220 
1221  try {
1222  this->checkInternalState();
1223  } catch (std::logic_error& e) {
1224  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::logic_error,
1225  "At end of allocateIndices, "
1226  "checkInternalState threw std::logic_error: "
1227  << e.what());
1228  } catch (std::exception& e) {
1229  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error,
1230  "At end of allocateIndices, "
1231  "checkInternalState threw std::exception: "
1232  << e.what());
1233  } catch (...) {
1234  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error,
1235  "At end of allocateIndices, "
1236  "checkInternalState threw an exception "
1237  "not a subclass of std::exception.");
1238  }
1239 
1240  if (verbose) {
1241  std::ostringstream os;
1242  os << *prefix << "Done" << endl;
1243  std::cerr << os.str();
1244  }
1245 }
1246 
1247 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1248 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1249  local_inds_dualv_type::t_host::const_type
1251  getLocalIndsViewHost(const RowInfo& rowinfo) const {
1252  if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1253  return typename local_inds_dualv_type::t_host::const_type();
1254  else
1255  return lclIndsUnpacked_wdv.getHostSubview(rowinfo.offset1D,
1256  rowinfo.allocSize,
1257  Access::ReadOnly);
1258 }
1259 
1260 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1262  local_inds_dualv_type::t_host
1265  if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1266  return typename local_inds_dualv_type::t_host();
1267  else
1268  return lclIndsUnpacked_wdv.getHostSubview(rowinfo.offset1D,
1269  rowinfo.allocSize,
1270  Access::ReadWrite);
1271 }
1272 
1273 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1275  global_inds_dualv_type::t_host::const_type
1277  getGlobalIndsViewHost(const RowInfo& rowinfo) const {
1278  if (rowinfo.allocSize == 0 || gblInds_wdv.extent(0) == 0)
1279  return typename global_inds_dualv_type::t_host::const_type();
1280  else
1281  return gblInds_wdv.getHostSubview(rowinfo.offset1D,
1282  rowinfo.allocSize,
1283  Access::ReadOnly);
1284 }
1285 
1286 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1288  local_inds_dualv_type::t_dev::const_type
1290  getLocalIndsViewDevice(const RowInfo& rowinfo) const {
1291  if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1292  return typename local_inds_dualv_type::t_dev::const_type();
1293  else
1294  return lclIndsUnpacked_wdv.getDeviceSubview(rowinfo.offset1D,
1295  rowinfo.allocSize,
1296  Access::ReadOnly);
1297 }
1298 
1299 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1301  global_inds_dualv_type::t_dev::const_type
1303  getGlobalIndsViewDevice(const RowInfo& rowinfo) const {
1304  if (rowinfo.allocSize == 0 || gblInds_wdv.extent(0) == 0)
1305  return typename global_inds_dualv_type::t_dev::const_type();
1306  else
1307  return gblInds_wdv.getDeviceSubview(rowinfo.offset1D,
1308  rowinfo.allocSize,
1309  Access::ReadOnly);
1310 }
1311 
1312 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1313 RowInfo
1315  getRowInfo(const LocalOrdinal myRow) const {
1316  const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid();
1317  RowInfo ret;
1318  if (this->rowMap_.is_null() || !this->rowMap_->isNodeLocalElement(myRow)) {
1319  ret.localRow = STINV;
1320  ret.allocSize = 0;
1321  ret.numEntries = 0;
1322  ret.offset1D = STINV;
1323  return ret;
1324  }
1325 
1326  ret.localRow = static_cast<size_t>(myRow);
1327  if (this->indicesAreAllocated()) {
1328  auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
1329  // Offsets tell us the allocation size in this case.
1330  if (rowPtrsUnpacked_host.extent(0) == 0) {
1331  ret.offset1D = 0;
1332  ret.allocSize = 0;
1333  } else {
1334  ret.offset1D = rowPtrsUnpacked_host(myRow);
1335  ret.allocSize = rowPtrsUnpacked_host(myRow + 1) - rowPtrsUnpacked_host(myRow);
1336  }
1337 
1338  ret.numEntries = (this->k_numRowEntries_.extent(0) == 0) ? ret.allocSize : this->k_numRowEntries_(myRow);
1339  } else { // haven't performed allocation yet; probably won't hit this code
1340  // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1341  // allocate, rather than doing lazy allocation at first insert.
1342  // This will make k_numAllocPerRow_ obsolete.
1343  ret.allocSize = (this->k_numAllocPerRow_.extent(0) != 0) ? this->k_numAllocPerRow_(myRow) : // this is a host View
1344  this->numAllocForAllRows_;
1345  ret.numEntries = 0;
1346  ret.offset1D = STINV;
1347  }
1348 
1349  return ret;
1350 }
1351 
1352 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1353 RowInfo
1355  getRowInfoFromGlobalRowIndex(const GlobalOrdinal gblRow) const {
1356  const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid();
1357  RowInfo ret;
1358  if (this->rowMap_.is_null()) {
1359  ret.localRow = STINV;
1360  ret.allocSize = 0;
1361  ret.numEntries = 0;
1362  ret.offset1D = STINV;
1363  return ret;
1364  }
1365  const LocalOrdinal myRow = this->rowMap_->getLocalElement(gblRow);
1366  if (myRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid()) {
1367  ret.localRow = STINV;
1368  ret.allocSize = 0;
1369  ret.numEntries = 0;
1370  ret.offset1D = STINV;
1371  return ret;
1372  }
1373 
1374  ret.localRow = static_cast<size_t>(myRow);
1375  if (this->indicesAreAllocated()) {
1376  // graph data structures have the info that we need
1377  //
1378  // if static graph, offsets tell us the allocation size
1379  auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
1380  if (rowPtrsUnpacked_host.extent(0) == 0) {
1381  ret.offset1D = 0;
1382  ret.allocSize = 0;
1383  } else {
1384  ret.offset1D = rowPtrsUnpacked_host(myRow);
1385  ret.allocSize = rowPtrsUnpacked_host(myRow + 1) - rowPtrsUnpacked_host(myRow);
1386  }
1387 
1388  ret.numEntries = (this->k_numRowEntries_.extent(0) == 0) ? ret.allocSize : this->k_numRowEntries_(myRow);
1389  } else { // haven't performed allocation yet; probably won't hit this code
1390  // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1391  // allocate, rather than doing lazy allocation at first insert.
1392  // This will make k_numAllocPerRow_ obsolete.
1393  ret.allocSize = (this->k_numAllocPerRow_.extent(0) != 0) ? this->k_numAllocPerRow_(myRow) : // this is a host View
1394  this->numAllocForAllRows_;
1395  ret.numEntries = 0;
1396  ret.offset1D = STINV;
1397  }
1398 
1399  return ret;
1400 }
1401 
1402 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1404  staticAssertions() const {
1405  using Teuchos::OrdinalTraits;
1406  typedef LocalOrdinal LO;
1407  typedef GlobalOrdinal GO;
1408  typedef global_size_t GST;
1409 
1410  // Assumption: sizeof(GlobalOrdinal) >= sizeof(LocalOrdinal):
1411  // This is so that we can store local indices in the memory
1412  // formerly occupied by global indices.
1413  static_assert(sizeof(GlobalOrdinal) >= sizeof(LocalOrdinal),
1414  "Tpetra::CrsGraph: sizeof(GlobalOrdinal) must be >= sizeof(LocalOrdinal).");
1415  // Assumption: max(size_t) >= max(LocalOrdinal)
1416  // This is so that we can represent any LocalOrdinal as a size_t.
1417  static_assert(sizeof(size_t) >= sizeof(LocalOrdinal),
1418  "Tpetra::CrsGraph: sizeof(size_t) must be >= sizeof(LocalOrdinal).");
1419  static_assert(sizeof(GST) >= sizeof(size_t),
1420  "Tpetra::CrsGraph: sizeof(Tpetra::global_size_t) must be >= sizeof(size_t).");
1421 
1422  // FIXME (mfh 30 Sep 2015) We're not using
1423  // Teuchos::CompileTimeAssert any more. Can we do these checks
1424  // with static_assert?
1425 
1426  // can't call max() with CompileTimeAssert, because it isn't a
1427  // constant expression; will need to make this a runtime check
1428  const char msg[] =
1429  "Tpetra::CrsGraph: Object cannot be created with the "
1430  "given template arguments: size assumptions are not valid.";
1431  TEUCHOS_TEST_FOR_EXCEPTION(
1432  static_cast<size_t>(Teuchos::OrdinalTraits<LO>::max()) > Teuchos::OrdinalTraits<size_t>::max(),
1433  std::runtime_error, msg);
1434  TEUCHOS_TEST_FOR_EXCEPTION(
1435  static_cast<GST>(Teuchos::OrdinalTraits<LO>::max()) > static_cast<GST>(Teuchos::OrdinalTraits<GO>::max()),
1436  std::runtime_error, msg);
1437  TEUCHOS_TEST_FOR_EXCEPTION(
1438  static_cast<size_t>(Teuchos::OrdinalTraits<GO>::max()) > Teuchos::OrdinalTraits<GST>::max(),
1439  std::runtime_error, msg);
1440  TEUCHOS_TEST_FOR_EXCEPTION(
1441  Teuchos::OrdinalTraits<size_t>::max() > Teuchos::OrdinalTraits<GST>::max(),
1442  std::runtime_error, msg);
1443 }
1444 
1445 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1446 size_t
1449  const SLocalGlobalViews& newInds,
1450  const ELocalGlobal lg,
1451  const ELocalGlobal I) {
1452  using Teuchos::ArrayView;
1453  typedef LocalOrdinal LO;
1454  typedef GlobalOrdinal GO;
1455  const char tfecfFuncName[] = "insertIndices: ";
1456 
1457  size_t oldNumEnt = 0;
1458  if (debug_) {
1459  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(lg != GlobalIndices && lg != LocalIndices, std::invalid_argument,
1460  "lg must be either GlobalIndices or LocalIndices.");
1461  oldNumEnt = this->getNumEntriesInLocalRow(rowinfo.localRow);
1462  }
1463 
1464  size_t numNewInds = 0;
1465  if (lg == GlobalIndices) { // input indices are global
1466  ArrayView<const GO> new_ginds = newInds.ginds;
1467  numNewInds = new_ginds.size();
1468  if (I == GlobalIndices) { // store global indices
1469  auto gind_view = gblInds_wdv.getHostView(Access::ReadWrite);
1470  if (debug_) {
1471  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(gind_view.size()) <
1472  rowinfo.numEntries + numNewInds,
1473  std::logic_error,
1474  "gind_view.size() = " << gind_view.size()
1475  << " < rowinfo.numEntries (= " << rowinfo.numEntries
1476  << ") + numNewInds (= " << numNewInds << ").");
1477  }
1478  GO* const gblColInds_out = gind_view.data() + rowinfo.offset1D + rowinfo.numEntries;
1479  for (size_t k = 0; k < numNewInds; ++k) {
1480  gblColInds_out[k] = new_ginds[k];
1481  }
1482  } else if (I == LocalIndices) { // store local indices
1483  auto lind_view = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1484  if (debug_) {
1485  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(lind_view.size()) <
1486  rowinfo.numEntries + numNewInds,
1487  std::logic_error,
1488  "lind_view.size() = " << lind_view.size()
1489  << " < rowinfo.numEntries (= " << rowinfo.numEntries
1490  << ") + numNewInds (= " << numNewInds << ").");
1491  }
1492  LO* const lclColInds_out = lind_view.data() + rowinfo.offset1D + rowinfo.numEntries;
1493  for (size_t k = 0; k < numNewInds; ++k) {
1494  lclColInds_out[k] = colMap_->getLocalElement(new_ginds[k]);
1495  }
1496  }
1497  } else if (lg == LocalIndices) { // input indices are local
1498  ArrayView<const LO> new_linds = newInds.linds;
1499  numNewInds = new_linds.size();
1500  if (I == LocalIndices) { // store local indices
1501  auto lind_view = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1502  if (debug_) {
1503  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(lind_view.size()) <
1504  rowinfo.numEntries + numNewInds,
1505  std::logic_error,
1506  "lind_view.size() = " << lind_view.size()
1507  << " < rowinfo.numEntries (= " << rowinfo.numEntries
1508  << ") + numNewInds (= " << numNewInds << ").");
1509  }
1510  LO* const lclColInds_out = lind_view.data() + rowinfo.offset1D + rowinfo.numEntries;
1511  for (size_t k = 0; k < numNewInds; ++k) {
1512  lclColInds_out[k] = new_linds[k];
1513  }
1514  } else if (I == GlobalIndices) {
1515  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::logic_error,
1516  "The case where the input indices are local "
1517  "and the indices to write are global (lg=LocalIndices, I="
1518  "GlobalIndices) is not implemented, because it does not make sense."
1519  << std::endl
1520  << "If you have correct local column indices, that "
1521  "means the graph has a column Map. In that case, you should be "
1522  "storing local indices.");
1523  }
1524  }
1525 
1526  rowinfo.numEntries += numNewInds;
1527  this->k_numRowEntries_(rowinfo.localRow) += numNewInds;
1528  this->setLocallyModified();
1529 
1530  if (debug_) {
1531  const size_t chkNewNumEnt =
1532  this->getNumEntriesInLocalRow(rowinfo.localRow);
1533  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(chkNewNumEnt != oldNumEnt + numNewInds, std::logic_error,
1534  "chkNewNumEnt = " << chkNewNumEnt
1535  << " != oldNumEnt (= " << oldNumEnt
1536  << ") + numNewInds (= " << numNewInds << ").");
1537  }
1538 
1539  return numNewInds;
1540 }
1541 
1542 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1543 size_t
1545  insertGlobalIndicesImpl(const LocalOrdinal lclRow,
1546  const GlobalOrdinal inputGblColInds[],
1547  const size_t numInputInds) {
1548  return this->insertGlobalIndicesImpl(this->getRowInfo(lclRow),
1549  inputGblColInds, numInputInds);
1550 }
1551 
1552 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1553 size_t
1556  const GlobalOrdinal inputGblColInds[],
1557  const size_t numInputInds,
1558  std::function<void(const size_t, const size_t, const size_t)> fun) {
1560  using Kokkos::MemoryUnmanaged;
1561  using Kokkos::subview;
1562  using Kokkos::View;
1563  using Teuchos::ArrayView;
1564  using LO = LocalOrdinal;
1565  using GO = GlobalOrdinal;
1566  const char tfecfFuncName[] = "insertGlobalIndicesImpl: ";
1567  const LO lclRow = static_cast<LO>(rowInfo.localRow);
1568 
1569  auto numEntries = rowInfo.numEntries;
1570  using inp_view_type = View<const GO*, Kokkos::HostSpace, MemoryUnmanaged>;
1571  inp_view_type inputInds(inputGblColInds, numInputInds);
1572  size_t numInserted;
1573  {
1574  auto gblIndsHostView = this->gblInds_wdv.getHostView(Access::ReadWrite);
1575  numInserted = Details::insertCrsIndices(lclRow, this->getRowPtrsUnpackedHost(),
1576  gblIndsHostView,
1577  numEntries, inputInds, fun);
1578  }
1579 
1580  const bool insertFailed =
1581  numInserted == Teuchos::OrdinalTraits<size_t>::invalid();
1582  if (insertFailed) {
1583  constexpr size_t ONE(1);
1584  const int myRank = this->getComm()->getRank();
1585  std::ostringstream os;
1586 
1587  os << "Proc " << myRank << ": Not enough capacity to insert "
1588  << numInputInds
1589  << " ind" << (numInputInds != ONE ? "ices" : "ex")
1590  << " into local row " << lclRow << ", which currently has "
1591  << rowInfo.numEntries
1592  << " entr" << (rowInfo.numEntries != ONE ? "ies" : "y")
1593  << " and total allocation size " << rowInfo.allocSize
1594  << ". ";
1595  const size_t maxNumToPrint =
1597  ArrayView<const GO> inputGblColIndsView(inputGblColInds,
1598  numInputInds);
1599  verbosePrintArray(os, inputGblColIndsView,
1600  "Input global "
1601  "column indices",
1602  maxNumToPrint);
1603  os << ", ";
1604  auto curGblColInds = getGlobalIndsViewHost(rowInfo);
1605  ArrayView<const GO> curGblColIndsView(curGblColInds.data(),
1606  rowInfo.numEntries);
1607  verbosePrintArray(os, curGblColIndsView,
1608  "Current global "
1609  "column indices",
1610  maxNumToPrint);
1611  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str());
1612  }
1613 
1614  this->k_numRowEntries_(lclRow) += numInserted;
1615 
1616  this->setLocallyModified();
1617  return numInserted;
1618 }
1619 
1620 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1622  insertLocalIndicesImpl(const LocalOrdinal myRow,
1623  const Teuchos::ArrayView<const LocalOrdinal>& indices,
1624  std::function<void(const size_t, const size_t, const size_t)> fun) {
1625  using Kokkos::MemoryUnmanaged;
1626  using Kokkos::subview;
1627  using Kokkos::View;
1628  using LO = LocalOrdinal;
1629  const char tfecfFuncName[] = "insertLocallIndicesImpl: ";
1630 
1631  const RowInfo rowInfo = this->getRowInfo(myRow);
1632 
1633  size_t numNewInds = 0;
1634  size_t newNumEntries = 0;
1635 
1636  auto numEntries = rowInfo.numEntries;
1637  // Note: Teuchos::ArrayViews are in HostSpace
1638  using inp_view_type = View<const LO*, Kokkos::HostSpace, MemoryUnmanaged>;
1639  inp_view_type inputInds(indices.getRawPtr(), indices.size());
1640  size_t numInserted = 0;
1641  {
1642  auto lclInds = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1643  numInserted = Details::insertCrsIndices(myRow, this->getRowPtrsUnpackedHost(), lclInds,
1644  numEntries, inputInds, fun);
1645  }
1646 
1647  const bool insertFailed =
1648  numInserted == Teuchos::OrdinalTraits<size_t>::invalid();
1649  if (insertFailed) {
1650  constexpr size_t ONE(1);
1651  const size_t numInputInds(indices.size());
1652  const int myRank = this->getComm()->getRank();
1653  std::ostringstream os;
1654  os << "On MPI Process " << myRank << ": Not enough capacity to "
1655  "insert "
1656  << numInputInds
1657  << " ind" << (numInputInds != ONE ? "ices" : "ex")
1658  << " into local row " << myRow << ", which currently has "
1659  << rowInfo.numEntries
1660  << " entr" << (rowInfo.numEntries != ONE ? "ies" : "y")
1661  << " and total allocation size " << rowInfo.allocSize << ".";
1662  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str());
1663  }
1664  numNewInds = numInserted;
1665  newNumEntries = rowInfo.numEntries + numNewInds;
1666 
1667  this->k_numRowEntries_(myRow) += numNewInds;
1668  this->setLocallyModified();
1669 
1670  if (debug_) {
1671  const size_t chkNewNumEntries = this->getNumEntriesInLocalRow(myRow);
1672  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(chkNewNumEntries != newNumEntries, std::logic_error,
1673  "getNumEntriesInLocalRow(" << myRow << ") = " << chkNewNumEntries
1674  << " != newNumEntries = " << newNumEntries
1675  << ". Please report this bug to the Tpetra developers.");
1676  }
1677 }
1678 
1679 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1680 size_t
1683  const Teuchos::ArrayView<const GlobalOrdinal>& indices,
1684  std::function<void(const size_t, const size_t, const size_t)> fun) const {
1685  using GO = GlobalOrdinal;
1686  using Kokkos::MemoryUnmanaged;
1687  using Kokkos::View;
1688  auto invalidCount = Teuchos::OrdinalTraits<size_t>::invalid();
1689 
1690  using inp_view_type = View<const GO*, Kokkos::HostSpace, MemoryUnmanaged>;
1691  inp_view_type inputInds(indices.getRawPtr(), indices.size());
1692 
1693  size_t numFound = 0;
1694  LocalOrdinal lclRow = rowInfo.localRow;
1695  if (this->isLocallyIndexed()) {
1696  if (this->colMap_.is_null())
1697  return invalidCount;
1698  const auto& colMap = *(this->colMap_);
1699  auto map = [&](GO const gblInd) { return colMap.getLocalElement(gblInd); };
1700  numFound = Details::findCrsIndices(lclRow, this->getRowPtrsUnpackedHost(),
1701  rowInfo.numEntries,
1702  lclIndsUnpacked_wdv.getHostView(Access::ReadOnly), inputInds, map, fun);
1703  } else if (this->isGloballyIndexed()) {
1704  numFound = Details::findCrsIndices(lclRow, this->getRowPtrsUnpackedHost(),
1705  rowInfo.numEntries,
1706  gblInds_wdv.getHostView(Access::ReadOnly), inputInds, fun);
1707  }
1708  return numFound;
1709 }
1710 
1711 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1712 size_t
1715  const bool sorted,
1716  const bool merged) {
1717  const size_t origNumEnt = rowInfo.numEntries;
1718  if (origNumEnt != Tpetra::Details::OrdinalTraits<size_t>::invalid() &&
1719  origNumEnt != 0) {
1720  auto lclColInds = this->getLocalIndsViewHostNonConst(rowInfo);
1721 
1722  LocalOrdinal* const lclColIndsRaw = lclColInds.data();
1723  if (!sorted) {
1724  std::sort(lclColIndsRaw, lclColIndsRaw + origNumEnt);
1725  }
1726 
1727  if (!merged) {
1728  LocalOrdinal* const beg = lclColIndsRaw;
1729  LocalOrdinal* const end = beg + rowInfo.numEntries;
1730  LocalOrdinal* const newend = std::unique(beg, end);
1731  const size_t newNumEnt = newend - beg;
1732 
1733  // NOTE (mfh 08 May 2017) This is a host View, so it does not assume UVM.
1734  this->k_numRowEntries_(rowInfo.localRow) = newNumEnt;
1735  return origNumEnt - newNumEnt; // the number of duplicates in the row
1736  } else {
1737  return static_cast<size_t>(0); // assume no duplicates
1738  }
1739  } else {
1740  return static_cast<size_t>(0); // no entries in the row
1741  }
1742 }
1743 
1744 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1746  setDomainRangeMaps(const Teuchos::RCP<const map_type>& domainMap,
1747  const Teuchos::RCP<const map_type>& rangeMap) {
1748  // simple pointer comparison for equality
1749  if (domainMap_ != domainMap) {
1750  domainMap_ = domainMap;
1751  importer_ = Teuchos::null;
1752  }
1753  if (rangeMap_ != rangeMap) {
1754  rangeMap_ = rangeMap;
1755  exporter_ = Teuchos::null;
1756  }
1757 }
1758 
1759 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1762  const auto INV = Teuchos::OrdinalTraits<global_size_t>::invalid();
1763 
1764  globalNumEntries_ = INV;
1765  globalMaxNumRowEntries_ = INV;
1766  haveGlobalConstants_ = false;
1767 }
1768 
1769 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1772  if (debug_) {
1773  using std::endl;
1774  const char tfecfFuncName[] = "checkInternalState: ";
1775  const char suffix[] = " Please report this bug to the Tpetra developers.";
1776 
1777  std::unique_ptr<std::string> prefix;
1778  if (verbose_) {
1779  prefix = this->createPrefix("CrsGraph", "checkInternalState");
1780  std::ostringstream os;
1781  os << *prefix << "Start" << endl;
1782  std::cerr << os.str();
1783  }
1784 
1785  const global_size_t GSTI = Teuchos::OrdinalTraits<global_size_t>::invalid();
1786  // const size_t STI = Teuchos::OrdinalTraits<size_t>::invalid (); // unused
1787  // check the internal state of this data structure
1788  // this is called by numerous state-changing methods, in a debug build, to ensure that the object
1789  // always remains in a valid state
1790 
1791  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->rowMap_.is_null(), std::logic_error,
1792  "Row Map is null." << suffix);
1793  // This may access the row Map, so we need to check first (above)
1794  // whether the row Map is null.
1795  const LocalOrdinal lclNumRows =
1796  static_cast<LocalOrdinal>(this->getLocalNumRows());
1797 
1798  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isFillActive() == this->isFillComplete(), std::logic_error,
1799  "Graph cannot be both fill active and fill complete." << suffix);
1800  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isFillComplete() &&
1801  (this->colMap_.is_null() ||
1802  this->rangeMap_.is_null() ||
1803  this->domainMap_.is_null()),
1804  std::logic_error,
1805  "Graph is full complete, but at least one of {column, range, domain} "
1806  "Map is null."
1807  << suffix);
1808  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isStorageOptimized() && !this->indicesAreAllocated(),
1809  std::logic_error,
1810  "Storage is optimized, but indices are not "
1811  "allocated, not even trivially."
1812  << suffix);
1813 
1814  size_t nodeAllocSize = 0;
1815  try {
1816  nodeAllocSize = this->getLocalAllocationSize();
1817  } catch (std::logic_error& e) {
1818  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error,
1819  "getLocalAllocationSize threw "
1820  "std::logic_error: "
1821  << e.what());
1822  } catch (std::exception& e) {
1823  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error,
1824  "getLocalAllocationSize threw an "
1825  "std::exception: "
1826  << e.what());
1827  } catch (...) {
1828  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error,
1829  "getLocalAllocationSize threw an exception "
1830  "not a subclass of std::exception.");
1831  }
1832 
1833  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isStorageOptimized() &&
1834  nodeAllocSize != this->getLocalNumEntries(),
1835  std::logic_error,
1836  "Storage is optimized, but "
1837  "this->getLocalAllocationSize() = "
1838  << nodeAllocSize
1839  << " != this->getLocalNumEntries() = " << this->getLocalNumEntries()
1840  << "." << suffix);
1841  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->haveGlobalConstants_ &&
1842  (this->globalNumEntries_ != GSTI ||
1843  this->globalMaxNumRowEntries_ != GSTI),
1844  std::logic_error,
1845  "Graph claims not to have global constants, but "
1846  "some of the global constants are not marked as invalid."
1847  << suffix);
1848  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->haveGlobalConstants_ &&
1849  (this->globalNumEntries_ == GSTI ||
1850  this->globalMaxNumRowEntries_ == GSTI),
1851  std::logic_error,
1852  "Graph claims to have global constants, but "
1853  "some of them are marked as invalid."
1854  << suffix);
1855  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->haveGlobalConstants_ &&
1856  (this->globalNumEntries_ < this->getLocalNumEntries() ||
1857  this->globalMaxNumRowEntries_ < this->nodeMaxNumRowEntries_),
1858  std::logic_error,
1859  "Graph claims to have global constants, and "
1860  "all of the values of the global constants are valid, but "
1861  "some of the local constants are greater than "
1862  "their corresponding global constants."
1863  << suffix);
1864  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->indicesAreAllocated() &&
1865  (this->numAllocForAllRows_ != 0 ||
1866  this->k_numAllocPerRow_.extent(0) != 0),
1867  std::logic_error,
1868  "The graph claims that its indices are allocated, but "
1869  "either numAllocForAllRows_ (= "
1870  << this->numAllocForAllRows_ << ") is "
1871  "nonzero, or k_numAllocPerRow_ has nonzero dimension. In other words, "
1872  "the graph is supposed to release its \"allocation specifications\" "
1873  "when it allocates its indices."
1874  << suffix);
1875  auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
1876  auto rowPtrsUnpacked_dev = this->getRowPtrsUnpackedDevice();
1877  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowPtrsUnpacked_host.extent(0) != rowPtrsUnpacked_dev.extent(0),
1878  std::logic_error,
1879  "The host and device views of k_rowPtrs_ have "
1880  "different sizes; rowPtrsUnpacked_host_ has size "
1881  << rowPtrsUnpacked_host.extent(0)
1882  << ", but rowPtrsUnpacked_dev_ has size "
1883  << rowPtrsUnpacked_dev.extent(0)
1884  << "." << suffix);
1885  if (isGloballyIndexed() && rowPtrsUnpacked_host.extent(0) != 0) {
1886  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(size_t(rowPtrsUnpacked_host.extent(0)) != size_t(lclNumRows + 1),
1887  std::logic_error,
1888  "The graph is globally indexed and "
1889  "k_rowPtrs has nonzero size "
1890  << rowPtrsUnpacked_host.extent(0)
1891  << ", but that size does not equal lclNumRows+1 = "
1892  << (lclNumRows + 1) << "." << suffix);
1893  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowPtrsUnpacked_host(lclNumRows) != size_t(gblInds_wdv.extent(0)),
1894  std::logic_error,
1895  "The graph is globally indexed and "
1896  "k_rowPtrs_ has nonzero size "
1897  << rowPtrsUnpacked_host.extent(0)
1898  << ", but k_rowPtrs_(lclNumRows=" << lclNumRows << ")="
1899  << rowPtrsUnpacked_host(lclNumRows)
1900  << " != gblInds_wdv.extent(0)="
1901  << gblInds_wdv.extent(0) << "." << suffix);
1902  }
1903  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isLocallyIndexed() &&
1904  rowPtrsUnpacked_host.extent(0) != 0 &&
1905  (static_cast<size_t>(rowPtrsUnpacked_host.extent(0)) !=
1906  static_cast<size_t>(lclNumRows + 1) ||
1907  rowPtrsUnpacked_host(lclNumRows) !=
1908  static_cast<size_t>(this->lclIndsUnpacked_wdv.extent(0))),
1909  std::logic_error,
1910  "If k_rowPtrs_ has nonzero size and "
1911  "the graph is locally indexed, then "
1912  "k_rowPtrs_ must have N+1 rows, and "
1913  "k_rowPtrs_(N) must equal lclIndsUnpacked_wdv.extent(0)."
1914  << suffix);
1915 
1916  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->indicesAreAllocated() &&
1917  nodeAllocSize > 0 &&
1918  this->lclIndsUnpacked_wdv.extent(0) == 0 &&
1919  this->gblInds_wdv.extent(0) == 0,
1920  std::logic_error,
1921  "Graph is allocated nontrivially, but "
1922  "but 1-D allocations are not present."
1923  << suffix);
1924 
1925  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->indicesAreAllocated() &&
1926  ((rowPtrsUnpacked_host.extent(0) != 0 ||
1927  this->k_numRowEntries_.extent(0) != 0) ||
1928  this->lclIndsUnpacked_wdv.extent(0) != 0 ||
1929  this->gblInds_wdv.extent(0) != 0),
1930  std::logic_error,
1931  "If indices are not allocated, "
1932  "then none of the buffers should be."
1933  << suffix);
1934  // indices may be local or global only if they are allocated
1935  // (numAllocated is redundant; could simply be indicesAreLocal_ ||
1936  // indicesAreGlobal_)
1937  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC((this->indicesAreLocal_ || this->indicesAreGlobal_) &&
1938  !this->indicesAreAllocated_,
1939  std::logic_error,
1940  "Indices may be local or global only if they are "
1941  "allocated."
1942  << suffix);
1943  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->indicesAreLocal_ && this->indicesAreGlobal_,
1944  std::logic_error, "Indices may not be both local and global." << suffix);
1945  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(indicesAreLocal_ && gblInds_wdv.extent(0) != 0,
1946  std::logic_error,
1947  "Indices are local, but "
1948  "gblInds_wdv.extent(0) (= "
1949  << gblInds_wdv.extent(0)
1950  << ") != 0. In other words, if indices are local, then "
1951  "allocations of global indices should not be present."
1952  << suffix);
1953  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(indicesAreGlobal_ && lclIndsUnpacked_wdv.extent(0) != 0,
1954  std::logic_error,
1955  "Indices are global, but "
1956  "lclIndsUnpacked_wdv.extent(0) (= "
1957  << lclIndsUnpacked_wdv.extent(0)
1958  << ") != 0. In other words, if indices are global, "
1959  "then allocations for local indices should not be present."
1960  << suffix);
1961  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(indicesAreLocal_ && nodeAllocSize > 0 &&
1962  lclIndsUnpacked_wdv.extent(0) == 0 && getLocalNumRows() > 0,
1963  std::logic_error,
1964  "Indices are local and "
1965  "getLocalAllocationSize() = "
1966  << nodeAllocSize << " > 0, but "
1967  "lclIndsUnpacked_wdv.extent(0) = 0 and getLocalNumRows() = "
1968  << getLocalNumRows() << " > 0." << suffix);
1969  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(indicesAreGlobal_ && nodeAllocSize > 0 &&
1970  gblInds_wdv.extent(0) == 0 && getLocalNumRows() > 0,
1971  std::logic_error,
1972  "Indices are global and "
1973  "getLocalAllocationSize() = "
1974  << nodeAllocSize << " > 0, but "
1975  "gblInds_wdv.extent(0) = 0 and getLocalNumRows() = "
1976  << getLocalNumRows() << " > 0." << suffix);
1977  // check the actual allocations
1978  if (this->indicesAreAllocated() &&
1979  rowPtrsUnpacked_host.extent(0) != 0) {
1980  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(rowPtrsUnpacked_host.extent(0)) !=
1981  this->getLocalNumRows() + 1,
1982  std::logic_error,
1983  "Indices are allocated and "
1984  "k_rowPtrs_ has nonzero length, but rowPtrsUnpacked_host_.extent(0) = "
1985  << rowPtrsUnpacked_host.extent(0) << " != getLocalNumRows()+1 = "
1986  << (this->getLocalNumRows() + 1) << "." << suffix);
1987  const size_t actualNumAllocated =
1988  rowPtrsUnpacked_host(this->getLocalNumRows());
1989  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isLocallyIndexed() &&
1990  static_cast<size_t>(this->lclIndsUnpacked_wdv.extent(0)) != actualNumAllocated,
1991  std::logic_error,
1992  "Graph is locally indexed, indices are "
1993  "are allocated, and k_rowPtrs_ has nonzero length, but "
1994  "lclIndsUnpacked_wdv.extent(0) = "
1995  << this->lclIndsUnpacked_wdv.extent(0)
1996  << " != actualNumAllocated = " << actualNumAllocated << suffix);
1997  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isGloballyIndexed() &&
1998  static_cast<size_t>(this->gblInds_wdv.extent(0)) != actualNumAllocated,
1999  std::logic_error,
2000  "Graph is globally indexed, indices "
2001  "are allocated, and k_rowPtrs_ has nonzero length, but "
2002  "gblInds_wdv.extent(0) = "
2003  << this->gblInds_wdv.extent(0)
2004  << " != actualNumAllocated = " << actualNumAllocated << suffix);
2005  }
2006 
2007  if (verbose_) {
2008  std::ostringstream os;
2009  os << *prefix << "Done" << endl;
2010  std::cerr << os.str();
2011  }
2012  }
2013 }
2014 
2015 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2016 size_t
2018  getNumEntriesInGlobalRow(GlobalOrdinal globalRow) const {
2019  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex(globalRow);
2020  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid()) {
2021  return Teuchos::OrdinalTraits<size_t>::invalid();
2022  } else {
2023  return rowInfo.numEntries;
2024  }
2025 }
2026 
2027 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2028 size_t
2030  getNumEntriesInLocalRow(LocalOrdinal localRow) const {
2031  const RowInfo rowInfo = this->getRowInfo(localRow);
2032  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid()) {
2033  return Teuchos::OrdinalTraits<size_t>::invalid();
2034  } else {
2035  return rowInfo.numEntries;
2036  }
2037 }
2038 
2039 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2040 size_t
2042  getNumAllocatedEntriesInGlobalRow(GlobalOrdinal globalRow) const {
2043  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex(globalRow);
2044  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid()) {
2045  return Teuchos::OrdinalTraits<size_t>::invalid();
2046  } else {
2047  return rowInfo.allocSize;
2048  }
2049 }
2050 
2051 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2052 size_t
2054  getNumAllocatedEntriesInLocalRow(LocalOrdinal localRow) const {
2055  const RowInfo rowInfo = this->getRowInfo(localRow);
2056  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid()) {
2057  return Teuchos::OrdinalTraits<size_t>::invalid();
2058  } else {
2059  return rowInfo.allocSize;
2060  }
2061 }
2062 
2063 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2064 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::row_ptrs_host_view_type
2067  return getRowPtrsPackedHost();
2068 }
2069 
2070 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2071 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::row_ptrs_device_view_type
2074  return getRowPtrsPackedDevice();
2075 }
2076 
2077 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2078 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::local_inds_host_view_type
2081  return lclIndsPacked_wdv.getHostView(Access::ReadOnly);
2082 }
2083 
2084 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2088  return lclIndsPacked_wdv.getDeviceView(Access::ReadOnly);
2089 }
2090 
2091 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2093  getLocalRowCopy(LocalOrdinal localRow,
2094  nonconst_local_inds_host_view_type& indices,
2095  size_t& numEntries) const {
2096  using Teuchos::ArrayView;
2097  const char tfecfFuncName[] = "getLocalRowCopy: ";
2098 
2099  TEUCHOS_TEST_FOR_EXCEPTION(
2100  isGloballyIndexed() && !hasColMap(), std::runtime_error,
2101  "Tpetra::CrsGraph::getLocalRowCopy: The graph is globally indexed and "
2102  "does not have a column Map yet. That means we don't have local indices "
2103  "for columns yet, so it doesn't make sense to call this method. If the "
2104  "graph doesn't have a column Map yet, you should call fillComplete on "
2105  "it first.");
2106 
2107  // This does the right thing (reports an empty row) if the input
2108  // row is invalid.
2109  const RowInfo rowinfo = this->getRowInfo(localRow);
2110  // No side effects on error.
2111  const size_t theNumEntries = rowinfo.numEntries;
2112  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(indices.size()) < theNumEntries, std::runtime_error,
2113  "Specified storage (size==" << indices.size() << ") does not suffice "
2114  "to hold all "
2115  << theNumEntries << " entry/ies for this row.");
2116  numEntries = theNumEntries;
2117 
2118  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid()) {
2119  if (isLocallyIndexed()) {
2120  auto lclInds = getLocalIndsViewHost(rowinfo);
2121  for (size_t j = 0; j < theNumEntries; ++j) {
2122  indices[j] = lclInds(j);
2123  }
2124  } else if (isGloballyIndexed()) {
2125  auto gblInds = getGlobalIndsViewHost(rowinfo);
2126  for (size_t j = 0; j < theNumEntries; ++j) {
2127  indices[j] = colMap_->getLocalElement(gblInds(j));
2128  }
2129  }
2130  }
2131 }
2132 
2133 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2135  getGlobalRowCopy(GlobalOrdinal globalRow,
2136  nonconst_global_inds_host_view_type& indices,
2137  size_t& numEntries) const {
2138  using Teuchos::ArrayView;
2139  const char tfecfFuncName[] = "getGlobalRowCopy: ";
2140 
2141  // This does the right thing (reports an empty row) if the input
2142  // row is invalid.
2143  const RowInfo rowinfo = getRowInfoFromGlobalRowIndex(globalRow);
2144  const size_t theNumEntries = rowinfo.numEntries;
2145  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2146  static_cast<size_t>(indices.size()) < theNumEntries, std::runtime_error,
2147  "Specified storage (size==" << indices.size() << ") does not suffice "
2148  "to hold all "
2149  << theNumEntries << " entry/ies for this row.");
2150  numEntries = theNumEntries; // first side effect
2151 
2152  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid()) {
2153  if (isLocallyIndexed()) {
2154  auto lclInds = getLocalIndsViewHost(rowinfo);
2155  for (size_t j = 0; j < theNumEntries; ++j) {
2156  indices[j] = colMap_->getGlobalElement(lclInds(j));
2157  }
2158  } else if (isGloballyIndexed()) {
2159  auto gblInds = getGlobalIndsViewHost(rowinfo);
2160  for (size_t j = 0; j < theNumEntries; ++j) {
2161  indices[j] = gblInds(j);
2162  }
2163  }
2164  }
2165 }
2166 
2167 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2170  const LocalOrdinal localRow,
2171  local_inds_host_view_type& indices) const {
2172  const char tfecfFuncName[] = "getLocalRowView: ";
2173 
2174  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isGloballyIndexed(), std::runtime_error,
2175  "The graph's indices are "
2176  "currently stored as global indices, so we cannot return a view with "
2177  "local column indices, whether or not the graph has a column Map. If "
2178  "the graph _does_ have a column Map, use getLocalRowCopy() instead.");
2179 
2180  const RowInfo rowInfo = getRowInfo(localRow);
2181  if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid() &&
2182  rowInfo.numEntries > 0) {
2183  indices = lclIndsUnpacked_wdv.getHostSubview(rowInfo.offset1D,
2184  rowInfo.numEntries,
2185  Access::ReadOnly);
2186  } else {
2187  // This does the right thing (reports an empty row) if the input
2188  // row is invalid.
2189  indices = local_inds_host_view_type();
2190  }
2191 
2192  if (debug_) {
2193  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(indices.size()) !=
2194  getNumEntriesInLocalRow(localRow),
2195  std::logic_error,
2196  "indices.size() "
2197  "= " << indices.extent(0)
2198  << " != getNumEntriesInLocalRow(localRow=" << localRow << ") = " << getNumEntriesInLocalRow(localRow) << ". Please report this bug to the Tpetra developers.");
2199  }
2200 }
2201 
2202 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2205  const GlobalOrdinal globalRow,
2206  global_inds_host_view_type& indices) const {
2207  const char tfecfFuncName[] = "getGlobalRowView: ";
2208 
2209  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isLocallyIndexed(), std::runtime_error,
2210  "The graph's indices are "
2211  "currently stored as local indices, so we cannot return a view with "
2212  "global column indices. Use getGlobalRowCopy() instead.");
2213 
2214  // This does the right thing (reports an empty row) if the input
2215  // row is invalid.
2216  const RowInfo rowInfo = getRowInfoFromGlobalRowIndex(globalRow);
2217  if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid() &&
2218  rowInfo.numEntries > 0) {
2219  indices = gblInds_wdv.getHostSubview(rowInfo.offset1D,
2220  rowInfo.numEntries,
2221  Access::ReadOnly);
2222  } else {
2223  indices = typename global_inds_dualv_type::t_host::const_type();
2224  }
2225  if (debug_) {
2226  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(indices.size()) !=
2227  getNumEntriesInGlobalRow(globalRow),
2228  std::logic_error, "indices.size() = " << indices.extent(0) << " != getNumEntriesInGlobalRow(globalRow=" << globalRow << ") = " << getNumEntriesInGlobalRow(globalRow) << ". Please report this bug to the Tpetra developers.");
2229  }
2230 }
2231 
2232 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2234  insertLocalIndices(const LocalOrdinal localRow,
2235  const Teuchos::ArrayView<const LocalOrdinal>& indices) {
2236  const char tfecfFuncName[] = "insertLocalIndices: ";
2237 
2238  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!isFillActive(), std::runtime_error, "Fill must be active.");
2239  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isGloballyIndexed(), std::runtime_error,
2240  "Graph indices are global; use insertGlobalIndices().");
2241  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!hasColMap(), std::runtime_error,
2242  "Cannot insert local indices without a column Map.");
2243  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!rowMap_->isNodeLocalElement(localRow), std::runtime_error,
2244  "Local row index " << localRow << " is not in the row Map "
2245  "on the calling process.");
2246  if (!indicesAreAllocated()) {
2247  allocateIndices(LocalIndices, verbose_);
2248  }
2249 
2250  if (debug_) {
2251  // In debug mode, if the graph has a column Map, test whether any
2252  // of the given column indices are not in the column Map. Keep
2253  // track of the invalid column indices so we can tell the user
2254  // about them.
2255  if (hasColMap()) {
2256  using std::endl;
2257  using Teuchos::Array;
2258  using Teuchos::toString;
2259  typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type size_type;
2260 
2261  const map_type& colMap = *colMap_;
2262  Array<LocalOrdinal> badColInds;
2263  bool allInColMap = true;
2264  for (size_type k = 0; k < indices.size(); ++k) {
2265  if (!colMap.isNodeLocalElement(indices[k])) {
2266  allInColMap = false;
2267  badColInds.push_back(indices[k]);
2268  }
2269  }
2270  if (!allInColMap) {
2271  std::ostringstream os;
2272  os << "Tpetra::CrsGraph::insertLocalIndices: You attempted to insert "
2273  "entries in owned row "
2274  << localRow << ", at the following column "
2275  "indices: "
2276  << toString(indices) << "." << endl;
2277  os << "Of those, the following indices are not in the column Map on "
2278  "this process: "
2279  << toString(badColInds) << "." << endl
2280  << "Since "
2281  "the graph has a column Map already, it is invalid to insert entries "
2282  "at those locations.";
2283  TEUCHOS_TEST_FOR_EXCEPTION(!allInColMap, std::invalid_argument, os.str());
2284  }
2285  }
2286  }
2287 
2288  insertLocalIndicesImpl(localRow, indices);
2289 
2290  if (debug_) {
2291  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!indicesAreAllocated() || !isLocallyIndexed(), std::logic_error,
2292  "At the end of insertLocalIndices, ! indicesAreAllocated() || "
2293  "! isLocallyIndexed() is true. Please report this bug to the "
2294  "Tpetra developers.");
2295  }
2296 }
2297 
2298 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2300  insertLocalIndices(const LocalOrdinal localRow,
2301  const LocalOrdinal numEnt,
2302  const LocalOrdinal inds[]) {
2303  Teuchos::ArrayView<const LocalOrdinal> indsT(inds, numEnt);
2304  this->insertLocalIndices(localRow, indsT);
2305 }
2306 
2307 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2309  insertGlobalIndices(const GlobalOrdinal gblRow,
2310  const LocalOrdinal numInputInds,
2311  const GlobalOrdinal inputGblColInds[]) {
2312  typedef LocalOrdinal LO;
2313  const char tfecfFuncName[] = "insertGlobalIndices: ";
2314 
2315  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isLocallyIndexed(), std::runtime_error,
2316  "graph indices are local; use insertLocalIndices().");
2317  // This can't really be satisfied for now, because if we are
2318  // fillComplete(), then we are local. In the future, this may
2319  // change. However, the rule that modification require active
2320  // fill will not change.
2321  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->isFillActive(), std::runtime_error,
2322  "You are not allowed to call this method if fill is not active. "
2323  "If fillComplete has been called, you must first call resumeFill "
2324  "before you may insert indices.");
2325  if (!indicesAreAllocated()) {
2326  allocateIndices(GlobalIndices, verbose_);
2327  }
2328  const LO lclRow = this->rowMap_->getLocalElement(gblRow);
2329  if (lclRow != Tpetra::Details::OrdinalTraits<LO>::invalid()) {
2330  if (debug_) {
2331  if (this->hasColMap()) {
2332  using std::endl;
2333  const map_type& colMap = *(this->colMap_);
2334  // In a debug build, keep track of the nonowned ("bad") column
2335  // indices, so that we can display them in the exception
2336  // message. In a release build, just ditch the loop early if
2337  // we encounter a nonowned column index.
2338  std::vector<GlobalOrdinal> badColInds;
2339  bool allInColMap = true;
2340  for (LO k = 0; k < numInputInds; ++k) {
2341  if (!colMap.isNodeGlobalElement(inputGblColInds[k])) {
2342  allInColMap = false;
2343  badColInds.push_back(inputGblColInds[k]);
2344  }
2345  }
2346  if (!allInColMap) {
2347  std::ostringstream os;
2348  os << "You attempted to insert entries in owned row " << gblRow
2349  << ", at the following column indices: [";
2350  for (LO k = 0; k < numInputInds; ++k) {
2351  os << inputGblColInds[k];
2352  if (k + static_cast<LO>(1) < numInputInds) {
2353  os << ",";
2354  }
2355  }
2356  os << "]." << endl
2357  << "Of those, the following indices are not in "
2358  "the column Map on this process: [";
2359  for (size_t k = 0; k < badColInds.size(); ++k) {
2360  os << badColInds[k];
2361  if (k + size_t(1) < badColInds.size()) {
2362  os << ",";
2363  }
2364  }
2365  os << "]." << endl
2366  << "Since the matrix has a column Map already, "
2367  "it is invalid to insert entries at those locations.";
2368  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::invalid_argument, os.str());
2369  }
2370  }
2371  } // debug_
2372  this->insertGlobalIndicesImpl(lclRow, inputGblColInds, numInputInds);
2373  } else { // a nonlocal row
2374  this->insertGlobalIndicesIntoNonownedRows(gblRow, inputGblColInds,
2375  numInputInds);
2376  }
2377 }
2378 
2379 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2381  insertGlobalIndices(const GlobalOrdinal gblRow,
2382  const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds) {
2383  this->insertGlobalIndices(gblRow, inputGblColInds.size(),
2384  inputGblColInds.getRawPtr());
2385 }
2386 
2387 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2389  insertGlobalIndicesFiltered(const LocalOrdinal lclRow,
2390  const GlobalOrdinal gblColInds[],
2391  const LocalOrdinal numGblColInds) {
2392  typedef LocalOrdinal LO;
2393  typedef GlobalOrdinal GO;
2394  const char tfecfFuncName[] = "insertGlobalIndicesFiltered: ";
2395 
2396  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isLocallyIndexed(), std::runtime_error,
2397  "Graph indices are local; use insertLocalIndices().");
2398  // This can't really be satisfied for now, because if we are
2399  // fillComplete(), then we are local. In the future, this may
2400  // change. However, the rule that modification require active
2401  // fill will not change.
2402  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->isFillActive(), std::runtime_error,
2403  "You are not allowed to call this method if fill is not active. "
2404  "If fillComplete has been called, you must first call resumeFill "
2405  "before you may insert indices.");
2406  if (!indicesAreAllocated()) {
2407  allocateIndices(GlobalIndices, verbose_);
2408  }
2409 
2410  Teuchos::ArrayView<const GO> gblColInds_av(gblColInds, numGblColInds);
2411  // If we have a column Map, use it to filter the entries.
2412  if (!colMap_.is_null()) {
2413  const map_type& colMap = *(this->colMap_);
2414 
2415  LO curOffset = 0;
2416  while (curOffset < numGblColInds) {
2417  // Find a sequence of input indices that are in the column Map
2418  // on the calling process. Doing a sequence at a time,
2419  // instead of one at a time, amortizes some overhead.
2420  LO endOffset = curOffset;
2421  for (; endOffset < numGblColInds; ++endOffset) {
2422  const LO lclCol = colMap.getLocalElement(gblColInds[endOffset]);
2423  if (lclCol == Tpetra::Details::OrdinalTraits<LO>::invalid()) {
2424  break; // first entry, in current sequence, not in the column Map
2425  }
2426  }
2427  // curOffset, endOffset: half-exclusive range of indices in
2428  // the column Map on the calling process. If endOffset ==
2429  // curOffset, the range is empty.
2430  const LO numIndInSeq = (endOffset - curOffset);
2431  if (numIndInSeq != 0) {
2432  this->insertGlobalIndicesImpl(lclRow, gblColInds + curOffset,
2433  numIndInSeq);
2434  }
2435  // Invariant before this line: Either endOffset ==
2436  // numGblColInds, or gblColInds[endOffset] is not in the
2437  // column Map on the calling process.
2438  curOffset = endOffset + 1;
2439  }
2440  } else {
2441  this->insertGlobalIndicesImpl(lclRow, gblColInds_av.getRawPtr(),
2442  gblColInds_av.size());
2443  }
2444 }
2445 
2446 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2448  insertGlobalIndicesIntoNonownedRows(const GlobalOrdinal gblRow,
2449  const GlobalOrdinal gblColInds[],
2450  const LocalOrdinal numGblColInds) {
2451  // This creates the std::vector if it doesn't exist yet.
2452  // std::map's operator[] does a lookup each time, so it's better
2453  // to pull nonlocals_[grow] out of the loop.
2454  std::vector<GlobalOrdinal>& nonlocalRow = this->nonlocals_[gblRow];
2455  for (LocalOrdinal k = 0; k < numGblColInds; ++k) {
2456  // FIXME (mfh 20 Jul 2017) Would be better to use a set, in
2457  // order to avoid duplicates. globalAssemble() sorts these
2458  // anyway.
2459  nonlocalRow.push_back(gblColInds[k]);
2460  }
2461 }
2462 
2463 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2465  removeLocalIndices(LocalOrdinal lrow) {
2466  const char tfecfFuncName[] = "removeLocalIndices: ";
2467  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2468  !isFillActive(), std::runtime_error, "requires that fill is active.");
2469  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2470  isStorageOptimized(), std::runtime_error,
2471  "cannot remove indices after optimizeStorage() has been called.");
2472  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2473  isGloballyIndexed(), std::runtime_error, "graph indices are global.");
2474  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2475  !rowMap_->isNodeLocalElement(lrow), std::runtime_error,
2476  "Local row " << lrow << " is not in the row Map on the calling process.");
2477  if (!indicesAreAllocated()) {
2478  allocateIndices(LocalIndices, verbose_);
2479  }
2480 
2481  if (k_numRowEntries_.extent(0) != 0) {
2482  this->k_numRowEntries_(lrow) = 0;
2483  }
2484 
2485  if (debug_) {
2486  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(getNumEntriesInLocalRow(lrow) != 0 ||
2487  !indicesAreAllocated() ||
2488  !isLocallyIndexed(),
2489  std::logic_error,
2490  "Violated stated post-conditions. Please contact Tpetra team.");
2491  }
2492 }
2493 
2494 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2496  setAllIndices(const typename local_graph_device_type::row_map_type& rowPointers,
2497  const typename local_graph_device_type::entries_type::non_const_type& columnIndices) {
2498  using ProfilingRegion = Details::ProfilingRegion;
2499  ProfilingRegion region("Tpetra::CrsGraph::setAllIndices");
2500  const char tfecfFuncName[] = "setAllIndices: ";
2501  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2502  !hasColMap() || getColMap().is_null(), std::runtime_error,
2503  "The graph must have a column Map before you may call this method.");
2504  LocalOrdinal numLocalRows = this->getLocalNumRows();
2505  {
2506  LocalOrdinal rowPtrLen = rowPointers.size();
2507  if (numLocalRows == 0) {
2508  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2509  rowPtrLen != 0 && rowPtrLen != 1,
2510  std::runtime_error, "Have 0 local rows, but rowPointers.size() is neither 0 nor 1.");
2511  } else {
2512  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2513  rowPtrLen != numLocalRows + 1,
2514  std::runtime_error, "rowPointers.size() = " << rowPtrLen << " != this->getLocalNumRows()+1 = " << (numLocalRows + 1) << ".");
2515  }
2516  }
2517 
2518  if (debug_) {
2519  using exec_space = typename local_graph_device_type::execution_space;
2520  int columnsOutOfBounds = 0;
2521  local_ordinal_type numLocalCols = this->getLocalNumCols();
2522  Kokkos::parallel_reduce(
2523  Kokkos::RangePolicy<exec_space>(0, columnIndices.extent(0)),
2524  KOKKOS_LAMBDA(const LocalOrdinal i, int& lOutOfBounds) {
2525  if (columnIndices(i) < 0 || columnIndices(i) >= numLocalCols)
2526  lOutOfBounds++;
2527  },
2528  columnsOutOfBounds);
2529  int globalColsOutOfBounds = 0;
2530  auto comm = this->getComm();
2531  Teuchos::reduceAll<int, int>(*comm, Teuchos::REDUCE_MAX, columnsOutOfBounds,
2532  Teuchos::outArg(globalColsOutOfBounds));
2533  if (globalColsOutOfBounds) {
2534  std::string message;
2535  if (columnsOutOfBounds) {
2536  // Only print message from ranks with the problem
2537  message = std::string("ERROR, rank ") + std::to_string(comm->getRank()) + ", CrsGraph::setAllIndices(): provided columnIndices are not all within range [0, getLocalNumCols())!\n";
2538  }
2539  Details::gathervPrint(std::cout, message, *comm);
2540  throw std::invalid_argument("CrsGraph::setAllIndices(): columnIndices are out of the valid range on at least one process.");
2541  }
2542  }
2543 
2544  if (debug_ && this->isSorted()) {
2545  // Verify that the local indices are actually sorted
2546  int notSorted = 0;
2547  using exec_space = typename local_graph_device_type::execution_space;
2548  using size_type = typename local_graph_device_type::size_type;
2549  Kokkos::parallel_reduce(
2550  Kokkos::RangePolicy<exec_space>(0, numLocalRows),
2551  KOKKOS_LAMBDA(const LocalOrdinal i, int& lNotSorted) {
2552  size_type rowBegin = rowPointers(i);
2553  size_type rowEnd = rowPointers(i + 1);
2554  for (size_type j = rowBegin + 1; j < rowEnd; j++) {
2555  if (columnIndices(j - 1) > columnIndices(j)) {
2556  lNotSorted = 1;
2557  }
2558  }
2559  },
2560  notSorted);
2561  // All-reduce notSorted to avoid rank divergence
2562  int globalNotSorted = 0;
2563  auto comm = this->getComm();
2564  Teuchos::reduceAll<int, int>(*comm, Teuchos::REDUCE_MAX, notSorted,
2565  Teuchos::outArg(globalNotSorted));
2566  if (globalNotSorted) {
2567  std::string message;
2568  if (notSorted) {
2569  // Only print message from ranks with the problem
2570  message = std::string("ERROR, rank ") + std::to_string(comm->getRank()) + ", CrsGraph::setAllIndices(): provided columnIndices are not sorted!\n";
2571  }
2572  Details::gathervPrint(std::cout, message, *comm);
2573  throw std::invalid_argument("CrsGraph::setAllIndices(): provided columnIndices are not sorted within rows on at least one process.");
2574  }
2575  }
2576 
2577  indicesAreAllocated_ = true;
2578  indicesAreLocal_ = true;
2579  indicesAreSorted_ = true;
2580  noRedundancies_ = true;
2581  lclIndsPacked_wdv = local_inds_wdv_type(columnIndices);
2582  lclIndsUnpacked_wdv = lclIndsPacked_wdv;
2583  setRowPtrs(rowPointers);
2584 
2585  set_need_sync_host_uvm_access(); // columnIndices and rowPointers potentially still in a kernel
2586 
2587  // Storage MUST be packed, since the interface doesn't give any
2588  // way to indicate any extra space at the end of each row.
2589  storageStatus_ = Details::STORAGE_1D_PACKED;
2590 
2591  // These normally get cleared out at the end of allocateIndices.
2592  // It makes sense to clear them out here, because at the end of
2593  // this method, the graph is allocated on the calling process.
2594  numAllocForAllRows_ = 0;
2595  k_numAllocPerRow_ = decltype(k_numAllocPerRow_)();
2596 
2597  checkInternalState();
2598 }
2599 
2600 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2602  setAllIndices(const Teuchos::ArrayRCP<size_t>& rowPointers,
2603  const Teuchos::ArrayRCP<LocalOrdinal>& columnIndices) {
2604  using Kokkos::View;
2605  typedef typename local_graph_device_type::row_map_type row_map_type;
2606  typedef typename row_map_type::array_layout layout_type;
2607  typedef typename row_map_type::non_const_value_type row_offset_type;
2608  typedef View<size_t*, layout_type, Kokkos::HostSpace,
2609  Kokkos::MemoryUnmanaged>
2610  input_view_type;
2611  typedef typename row_map_type::non_const_type nc_row_map_type;
2612 
2613  const size_t size = static_cast<size_t>(rowPointers.size());
2614  constexpr bool same = std::is_same<size_t, row_offset_type>::value;
2615  input_view_type ptr_in(rowPointers.getRawPtr(), size);
2616 
2617  nc_row_map_type ptr_rot("Tpetra::CrsGraph::ptr", size);
2618 
2619  if constexpr (same) { // size_t == row_offset_type
2620  using lexecution_space = typename device_type::execution_space;
2621  Kokkos::deep_copy(lexecution_space(),
2622  ptr_rot,
2623  ptr_in);
2624  } else { // size_t != row_offset_type
2625  // CudaUvmSpace != HostSpace, so this will be false in that case.
2626  constexpr bool inHostMemory =
2627  std::is_same<typename row_map_type::memory_space,
2628  Kokkos::HostSpace>::value;
2629  if (inHostMemory) {
2630  // Copy (with cast from size_t to row_offset_type, with bounds
2631  // checking if necessary) to ptr_rot.
2632  ::Tpetra::Details::copyOffsets(ptr_rot, ptr_in);
2633  } else { // Copy input row offsets to device first.
2634  //
2635  // FIXME (mfh 24 Mar 2015) If CUDA UVM, running in the host's
2636  // execution space would avoid the double copy.
2637  //
2638  View<size_t*, layout_type, device_type> ptr_st("Tpetra::CrsGraph::ptr", size);
2639 
2640  // DEEP_COPY REVIEW - NOT TESTED
2641  Kokkos::deep_copy(ptr_st, ptr_in);
2642  // Copy on device (casting from size_t to row_offset_type,
2643  // with bounds checking if necessary) to ptr_rot. This
2644  // executes in the output View's execution space, which is the
2645  // same as execution_space.
2646  ::Tpetra::Details::copyOffsets(ptr_rot, ptr_st);
2647  }
2648  }
2649 
2650  Kokkos::View<LocalOrdinal*, layout_type, device_type> k_ind =
2651  Kokkos::Compat::getKokkosViewDeepCopy<device_type>(columnIndices());
2652  setAllIndices(ptr_rot, k_ind);
2653 }
2654 
2655 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2658  using std::endl;
2659  using Teuchos::Comm;
2660  using Teuchos::outArg;
2661  using Teuchos::RCP;
2662  using Teuchos::rcp;
2663  using Teuchos::REDUCE_MAX;
2664  using Teuchos::REDUCE_MIN;
2665  using Teuchos::reduceAll;
2666  using crs_graph_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
2667  using LO = local_ordinal_type;
2668  using GO = global_ordinal_type;
2669  using size_type = typename Teuchos::Array<GO>::size_type;
2670  const char tfecfFuncName[] = "globalAssemble: "; // for exception macro
2671 
2672  std::unique_ptr<std::string> prefix;
2673  if (verbose_) {
2674  prefix = this->createPrefix("CrsGraph", "globalAssemble");
2675  std::ostringstream os;
2676  os << *prefix << "Start" << endl;
2677  std::cerr << os.str();
2678  }
2679  RCP<const Comm<int>> comm = getComm();
2680 
2681  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!isFillActive(), std::runtime_error,
2682  "Fill must be active before "
2683  "you may call this method.");
2684 
2685  const size_t myNumNonlocalRows = this->nonlocals_.size();
2686 
2687  // If no processes have nonlocal rows, then we don't have to do
2688  // anything. Checking this is probably cheaper than constructing
2689  // the Map of nonlocal rows (see below) and noticing that it has
2690  // zero global entries.
2691  {
2692  const int iHaveNonlocalRows = (myNumNonlocalRows == 0) ? 0 : 1;
2693  int someoneHasNonlocalRows = 0;
2694  reduceAll<int, int>(*comm, REDUCE_MAX, iHaveNonlocalRows,
2695  outArg(someoneHasNonlocalRows));
2696  if (someoneHasNonlocalRows == 0) {
2697  if (verbose_) {
2698  std::ostringstream os;
2699  os << *prefix << "Done: No nonlocal rows" << endl;
2700  std::cerr << os.str();
2701  }
2702  return;
2703  } else if (verbose_) {
2704  std::ostringstream os;
2705  os << *prefix << "At least 1 process has nonlocal rows"
2706  << endl;
2707  std::cerr << os.str();
2708  }
2709  }
2710 
2711  // 1. Create a list of the "nonlocal" rows on each process. this
2712  // requires iterating over nonlocals_, so while we do this,
2713  // deduplicate the entries and get a count for each nonlocal
2714  // row on this process.
2715  // 2. Construct a new row Map corresponding to those rows. This
2716  // Map is likely overlapping. We know that the Map is not
2717  // empty on all processes, because the above all-reduce and
2718  // return exclude that case.
2719 
2720  RCP<const map_type> nonlocalRowMap;
2721  // Keep this for CrsGraph's constructor.
2722  Teuchos::Array<size_t> numEntPerNonlocalRow(myNumNonlocalRows);
2723  {
2724  Teuchos::Array<GO> myNonlocalGblRows(myNumNonlocalRows);
2725  size_type curPos = 0;
2726  for (auto mapIter = this->nonlocals_.begin();
2727  mapIter != this->nonlocals_.end();
2728  ++mapIter, ++curPos) {
2729  myNonlocalGblRows[curPos] = mapIter->first;
2730  std::vector<GO>& gblCols = mapIter->second; // by ref; change in place
2731  std::sort(gblCols.begin(), gblCols.end());
2732  auto vecLast = std::unique(gblCols.begin(), gblCols.end());
2733  gblCols.erase(vecLast, gblCols.end());
2734  numEntPerNonlocalRow[curPos] = gblCols.size();
2735  }
2736 
2737  // Currently, Map requires that its indexBase be the global min
2738  // of all its global indices. Map won't compute this for us, so
2739  // we must do it. If our process has no nonlocal rows, set the
2740  // "min" to the max possible GO value. This ensures that if
2741  // some process has at least one nonlocal row, then it will pick
2742  // that up as the min. We know that at least one process has a
2743  // nonlocal row, since the all-reduce and return at the top of
2744  // this method excluded that case.
2745  GO myMinNonlocalGblRow = std::numeric_limits<GO>::max();
2746  {
2747  auto iter = std::min_element(myNonlocalGblRows.begin(),
2748  myNonlocalGblRows.end());
2749  if (iter != myNonlocalGblRows.end()) {
2750  myMinNonlocalGblRow = *iter;
2751  }
2752  }
2753  GO gblMinNonlocalGblRow = 0;
2754  reduceAll<int, GO>(*comm, REDUCE_MIN, myMinNonlocalGblRow,
2755  outArg(gblMinNonlocalGblRow));
2756  const GO indexBase = gblMinNonlocalGblRow;
2757  const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid();
2758  nonlocalRowMap = rcp(new map_type(INV, myNonlocalGblRows(), indexBase, comm));
2759  }
2760 
2761  if (verbose_) {
2762  std::ostringstream os;
2763  os << *prefix << "nonlocalRowMap->getIndexBase()="
2764  << nonlocalRowMap->getIndexBase() << endl;
2765  std::cerr << os.str();
2766  }
2767 
2768  // 3. Use the column indices for each nonlocal row, as stored in
2769  // nonlocals_, to construct a CrsGraph corresponding to
2770  // nonlocal rows. We need, but we have, exact counts of the
2771  // number of entries in each nonlocal row.
2772 
2773  RCP<crs_graph_type> nonlocalGraph =
2774  rcp(new crs_graph_type(nonlocalRowMap, numEntPerNonlocalRow()));
2775  {
2776  size_type curPos = 0;
2777  for (auto mapIter = this->nonlocals_.begin();
2778  mapIter != this->nonlocals_.end();
2779  ++mapIter, ++curPos) {
2780  const GO gblRow = mapIter->first;
2781  std::vector<GO>& gblCols = mapIter->second; // by ref just to avoid copy
2782  const LO numEnt = static_cast<LO>(numEntPerNonlocalRow[curPos]);
2783  nonlocalGraph->insertGlobalIndices(gblRow, numEnt, gblCols.data());
2784  }
2785  }
2786  if (verbose_) {
2787  std::ostringstream os;
2788  os << *prefix << "Built nonlocal graph" << endl;
2789  std::cerr << os.str();
2790  }
2791  // There's no need to fill-complete the nonlocals graph.
2792  // We just use it as a temporary container for the Export.
2793 
2794  // 4. If the original row Map is one to one, then we can Export
2795  // directly from nonlocalGraph into this. Otherwise, we have
2796  // to create a temporary graph with a one-to-one row Map,
2797  // Export into that, then Import from the temporary graph into
2798  // *this.
2799 
2800  auto origRowMap = this->getRowMap();
2801  const bool origRowMapIsOneToOne = origRowMap->isOneToOne();
2802 
2803  if (origRowMapIsOneToOne) {
2804  if (verbose_) {
2805  std::ostringstream os;
2806  os << *prefix << "Original row Map is 1-to-1" << endl;
2807  std::cerr << os.str();
2808  }
2809  export_type exportToOrig(nonlocalRowMap, origRowMap);
2810  this->doExport(*nonlocalGraph, exportToOrig, Tpetra::INSERT);
2811  // We're done at this point!
2812  } else {
2813  if (verbose_) {
2814  std::ostringstream os;
2815  os << *prefix << "Original row Map is NOT 1-to-1" << endl;
2816  std::cerr << os.str();
2817  }
2818  // If you ask a Map whether it is one to one, it does some
2819  // communication and stashes intermediate results for later use
2820  // by createOneToOne. Thus, calling createOneToOne doesn't cost
2821  // much more then the original cost of calling isOneToOne.
2822  auto oneToOneRowMap = Tpetra::createOneToOne(origRowMap);
2823  export_type exportToOneToOne(nonlocalRowMap, oneToOneRowMap);
2824 
2825  // Create a temporary graph with the one-to-one row Map.
2826  //
2827  // TODO (mfh 09 Sep 2016) Estimate the number of entries in each
2828  // row, to avoid reallocation during the Export operation.
2829  crs_graph_type oneToOneGraph(oneToOneRowMap, 0);
2830 
2831  // Export from graph of nonlocals into the temp one-to-one graph.
2832  if (verbose_) {
2833  std::ostringstream os;
2834  os << *prefix << "Export nonlocal graph" << endl;
2835  std::cerr << os.str();
2836  }
2837  oneToOneGraph.doExport(*nonlocalGraph, exportToOneToOne, Tpetra::INSERT);
2838 
2839  // We don't need the graph of nonlocals anymore, so get rid of
2840  // it, to keep the memory high-water mark down.
2841  nonlocalGraph = Teuchos::null;
2842 
2843  // Import from the one-to-one graph to the original graph.
2844  import_type importToOrig(oneToOneRowMap, origRowMap);
2845  if (verbose_) {
2846  std::ostringstream os;
2847  os << *prefix << "Import nonlocal graph" << endl;
2848  std::cerr << os.str();
2849  }
2850  this->doImport(oneToOneGraph, importToOrig, Tpetra::INSERT);
2851  }
2852 
2853  // It's safe now to clear out nonlocals_, since we've already
2854  // committed side effects to *this. The standard idiom for
2855  // clearing a Container like std::map, is to swap it with an empty
2856  // Container and let the swapped Container fall out of scope.
2857  decltype(this->nonlocals_) newNonlocals;
2858  std::swap(this->nonlocals_, newNonlocals);
2859 
2860  checkInternalState();
2861  if (verbose_) {
2862  std::ostringstream os;
2863  os << *prefix << "Done" << endl;
2864  std::cerr << os.str();
2865  }
2866 }
2867 
2868 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2870  resumeFill(const Teuchos::RCP<Teuchos::ParameterList>& params) {
2871  clearGlobalConstants();
2872  if (params != Teuchos::null) this->setParameterList(params);
2873  // either still sorted/merged or initially sorted/merged
2874  indicesAreSorted_ = true;
2875  noRedundancies_ = true;
2876  fillComplete_ = false;
2877 }
2878 
2879 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2881  fillComplete(const Teuchos::RCP<Teuchos::ParameterList>& params) {
2882  // If the graph already has domain and range Maps, don't clobber
2883  // them. If it doesn't, use the current row Map for both the
2884  // domain and range Maps.
2885  //
2886  // NOTE (mfh 28 Sep 2014): If the graph was constructed without a
2887  // column Map, and column indices are inserted which are not in
2888  // the row Map on any process, this will cause troubles. However,
2889  // that is not a common case for most applications that we
2890  // encounter, and checking for it might require more
2891  // communication.
2892  Teuchos::RCP<const map_type> domMap = this->getDomainMap();
2893  if (domMap.is_null()) {
2894  domMap = this->getRowMap();
2895  }
2896  Teuchos::RCP<const map_type> ranMap = this->getRangeMap();
2897  if (ranMap.is_null()) {
2898  ranMap = this->getRowMap();
2899  }
2900  this->fillComplete(domMap, ranMap, params);
2901 }
2902 
2903 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2905  fillComplete(const Teuchos::RCP<const map_type>& domainMap,
2906  const Teuchos::RCP<const map_type>& rangeMap,
2907  const Teuchos::RCP<Teuchos::ParameterList>& params) {
2908  using std::endl;
2909  const char tfecfFuncName[] = "fillComplete: ";
2910  const bool verbose = verbose_;
2911 
2912  std::unique_ptr<std::string> prefix;
2913  if (verbose) {
2914  prefix = this->createPrefix("CrsGraph", "fillComplete");
2915  std::ostringstream os;
2916  os << *prefix << "Start" << endl;
2917  std::cerr << os.str();
2918  }
2919 
2920  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!isFillActive() || isFillComplete(), std::runtime_error,
2921  "Graph fill state must be active (isFillActive() "
2922  "must be true) before calling fillComplete().");
2923 
2924  const int numProcs = getComm()->getSize();
2925 
2926  //
2927  // Read and set parameters
2928  //
2929 
2930  // Does the caller want to sort remote GIDs (within those owned by
2931  // the same process) in makeColMap()?
2932  if (!params.is_null()) {
2933  if (params->isParameter("sort column map ghost gids")) {
2934  sortGhostsAssociatedWithEachProcessor_ =
2935  params->get<bool>("sort column map ghost gids",
2936  sortGhostsAssociatedWithEachProcessor_);
2937  } else if (params->isParameter("Sort column Map ghost GIDs")) {
2938  sortGhostsAssociatedWithEachProcessor_ =
2939  params->get<bool>("Sort column Map ghost GIDs",
2940  sortGhostsAssociatedWithEachProcessor_);
2941  }
2942  }
2943 
2944  // If true, the caller promises that no process did nonlocal
2945  // changes since the last call to fillComplete.
2946  bool assertNoNonlocalInserts = false;
2947  if (!params.is_null()) {
2948  assertNoNonlocalInserts =
2949  params->get<bool>("No Nonlocal Changes", assertNoNonlocalInserts);
2950  }
2951 
2952  //
2953  // Allocate indices, if they haven't already been allocated
2954  //
2955  if (!indicesAreAllocated()) {
2956  if (hasColMap()) {
2957  // We have a column Map, so use local indices.
2958  allocateIndices(LocalIndices, verbose);
2959  } else {
2960  // We don't have a column Map, so use global indices.
2961  allocateIndices(GlobalIndices, verbose);
2962  }
2963  }
2964 
2965  //
2966  // Do global assembly, if requested and if the communicator
2967  // contains more than one process.
2968  //
2969  const bool mayNeedGlobalAssemble = !assertNoNonlocalInserts && numProcs > 1;
2970  if (mayNeedGlobalAssemble) {
2971  // This first checks if we need to do global assembly.
2972  // The check costs a single all-reduce.
2973  globalAssemble();
2974  } else {
2975  const size_t numNonlocals = nonlocals_.size();
2976  if (verbose) {
2977  std::ostringstream os;
2978  os << *prefix << "Do not need to call globalAssemble; "
2979  "assertNoNonlocalInserts="
2980  << (assertNoNonlocalInserts ? "true" : "false")
2981  << "numProcs=" << numProcs
2982  << ", nonlocals_.size()=" << numNonlocals << endl;
2983  std::cerr << os.str();
2984  }
2985  const int lclNeededGlobalAssemble =
2986  (numProcs > 1 && numNonlocals != 0) ? 1 : 0;
2987  if (lclNeededGlobalAssemble != 0 && verbose) {
2988  std::ostringstream os;
2989  os << *prefix;
2990  Details::Impl::verbosePrintMap(
2991  os, nonlocals_.begin(), nonlocals_.end(),
2992  nonlocals_.size(), "nonlocals_");
2993  std::cerr << os.str() << endl;
2994  }
2995 
2996  if (debug_) {
2997  auto map = this->getMap();
2998  auto comm = map.is_null() ? Teuchos::null : map->getComm();
2999  int gblNeededGlobalAssemble = lclNeededGlobalAssemble;
3000  if (!comm.is_null()) {
3001  using Teuchos::REDUCE_MAX;
3002  using Teuchos::reduceAll;
3003  reduceAll(*comm, REDUCE_MAX, lclNeededGlobalAssemble,
3004  Teuchos::outArg(gblNeededGlobalAssemble));
3005  }
3006  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(gblNeededGlobalAssemble != 0, std::runtime_error,
3007  "nonlocals_.size()=" << numNonlocals << " != 0 on at "
3008  "least one process in the CrsGraph's communicator. This "
3009  "means either that you incorrectly set the "
3010  "\"No Nonlocal Changes\" fillComplete parameter to true, "
3011  "or that you inserted invalid entries. "
3012  "Rerun with the environment variable TPETRA_VERBOSE="
3013  "CrsGraph set to see the entries of nonlocals_ on every "
3014  "MPI process (WARNING: lots of output).");
3015  } else {
3016  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(lclNeededGlobalAssemble != 0, std::runtime_error,
3017  "nonlocals_.size()=" << numNonlocals << " != 0 on the "
3018  "calling process. This means either that you incorrectly "
3019  "set the \"No Nonlocal Changes\" fillComplete parameter "
3020  "to true, or that you inserted invalid entries. "
3021  "Rerun with the environment "
3022  "variable TPETRA_VERBOSE=CrsGraph set to see the entries "
3023  "of nonlocals_ on every MPI process (WARNING: lots of "
3024  "output).");
3025  }
3026  }
3027 
3028  // Set domain and range Map. This may clear the Import / Export
3029  // objects if the new Maps differ from any old ones.
3030  setDomainRangeMaps(domainMap, rangeMap);
3031 
3032  // If the graph does not already have a column Map (either from
3033  // the user constructor calling the version of the constructor
3034  // that takes a column Map, or from a previous fillComplete call),
3035  // then create it.
3036  Teuchos::Array<int> remotePIDs(0);
3037  const bool mustBuildColMap = !this->hasColMap();
3038  if (mustBuildColMap) {
3039  this->makeColMap(remotePIDs); // resized on output
3040  }
3041 
3042  // Make indices local, if they aren't already.
3043  // The method doesn't do any work if the indices are already local.
3044  const std::pair<size_t, std::string> makeIndicesLocalResult =
3045  this->makeIndicesLocal(verbose);
3046 
3047  if (debug_) {
3048  using Details::gathervPrint;
3049  using Teuchos::outArg;
3050  using Teuchos::RCP;
3051  using Teuchos::REDUCE_MIN;
3052  using Teuchos::reduceAll;
3053 
3054  RCP<const map_type> map = this->getMap();
3055  RCP<const Teuchos::Comm<int>> comm;
3056  if (!map.is_null()) {
3057  comm = map->getComm();
3058  }
3059  if (comm.is_null()) {
3060  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(makeIndicesLocalResult.first != 0, std::runtime_error,
3061  makeIndicesLocalResult.second);
3062  } else {
3063  const int lclSuccess = (makeIndicesLocalResult.first == 0);
3064  int gblSuccess = 0; // output argument
3065  reduceAll(*comm, REDUCE_MIN, lclSuccess, outArg(gblSuccess));
3066  if (gblSuccess != 1) {
3067  std::ostringstream os;
3068  gathervPrint(os, makeIndicesLocalResult.second, *comm);
3069  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str());
3070  }
3071  }
3072  } else {
3073  // TODO (mfh 20 Jul 2017) Instead of throwing here, pass along
3074  // the error state to makeImportExport or
3075  // computeGlobalConstants, which may do all-reduces and thus may
3076  // have the opportunity to communicate that error state.
3077  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(makeIndicesLocalResult.first != 0, std::runtime_error,
3078  makeIndicesLocalResult.second);
3079  }
3080 
3081  // If this process has no indices, then CrsGraph considers it
3082  // already trivially sorted and merged. Thus, this method need
3083  // not be called on all processes in the row Map's communicator.
3084  this->sortAndMergeAllIndices(this->isSorted(), this->isMerged());
3085 
3086  // Make Import and Export objects, if they haven't been made
3087  // already. If we made a column Map above, reuse information from
3088  // that process to avoid communiation in the Import setup.
3089  this->makeImportExport(remotePIDs, mustBuildColMap);
3090 
3091  // Create the KokkosSparse::StaticCrsGraph, if it doesn't already exist.
3092  this->fillLocalGraph(params);
3093 
3094  const bool callComputeGlobalConstants = params.get() == nullptr ||
3095  params->get("compute global constants", true);
3096  if (callComputeGlobalConstants) {
3097  this->computeGlobalConstants();
3098  } else {
3099  this->computeLocalConstants();
3100  }
3101  this->fillComplete_ = true;
3102  this->checkInternalState();
3103 
3104  if (verbose) {
3105  std::ostringstream os;
3106  os << *prefix << "Done" << endl;
3107  std::cerr << os.str();
3108  }
3109 }
3110 
3111 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3113  expertStaticFillComplete(const Teuchos::RCP<const map_type>& domainMap,
3114  const Teuchos::RCP<const map_type>& rangeMap,
3115  const Teuchos::RCP<const import_type>& importer,
3116  const Teuchos::RCP<const export_type>& exporter,
3117  const Teuchos::RCP<Teuchos::ParameterList>& params) {
3118  const char tfecfFuncName[] = "expertStaticFillComplete: ";
3119 #ifdef HAVE_TPETRA_MMM_TIMINGS
3120  std::string label;
3121  if (!params.is_null())
3122  label = params->get("Timer Label", label);
3123  std::string prefix = std::string("Tpetra ") + label + std::string(": ");
3124  using Teuchos::TimeMonitor;
3125  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Setup"))));
3126 #endif
3127 
3128  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3129  domainMap.is_null() || rangeMap.is_null(),
3130  std::runtime_error, "The input domain Map and range Map must be nonnull.");
3131  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3132  isFillComplete() || !hasColMap(), std::runtime_error,
3133  "You may not "
3134  "call this method unless the graph has a column Map.");
3135  auto rowPtrsUnpackedLength = this->getRowPtrsUnpackedDevice().extent(0);
3136  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3137  getLocalNumRows() > 0 && rowPtrsUnpackedLength == 0,
3138  std::runtime_error, "The calling process has getLocalNumRows() = " << getLocalNumRows() << " > 0 rows, but the row offsets array has not "
3139  "been set.");
3140  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3141  static_cast<size_t>(rowPtrsUnpackedLength) != getLocalNumRows() + 1,
3142  std::runtime_error, "The row offsets array has length " << rowPtrsUnpackedLength << " != getLocalNumRows()+1 = " << (getLocalNumRows() + 1) << ".");
3143 
3144  // Note: We don't need to do the following things which are normally done in fillComplete:
3145  // allocateIndices, globalAssemble, makeColMap, makeIndicesLocal, sortAndMergeAllIndices
3146 
3147  // Constants from allocateIndices
3148  //
3149  // mfh 08 Aug 2014: numAllocForAllRows_ and k_numAllocPerRow_ go
3150  // away once the graph is allocated. expertStaticFillComplete
3151  // either presumes that the graph is allocated, or "allocates" it.
3152  //
3153  // FIXME (mfh 08 Aug 2014) The goal for the Kokkos refactor
3154  // version of CrsGraph is to allocate in the constructor, not
3155  // lazily on first insert. That will make both
3156  // numAllocForAllRows_ and k_numAllocPerRow_ obsolete.
3157  numAllocForAllRows_ = 0;
3158  k_numAllocPerRow_ = decltype(k_numAllocPerRow_)();
3159  indicesAreAllocated_ = true;
3160 
3161  // Constants from makeIndicesLocal
3162  //
3163  // The graph has a column Map, so its indices had better be local.
3164  indicesAreLocal_ = true;
3165  indicesAreGlobal_ = false;
3166 
3167  // set domain/range map: may clear the import/export objects
3168 #ifdef HAVE_TPETRA_MMM_TIMINGS
3169  MM = Teuchos::null;
3170  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Maps"))));
3171 #endif
3172  setDomainRangeMaps(domainMap, rangeMap);
3173 
3174  // Presume the user sorted and merged the arrays first
3175  indicesAreSorted_ = true;
3176  noRedundancies_ = true;
3177 
3178  // makeImportExport won't create a new importer/exporter if I set one here first.
3179 #ifdef HAVE_TPETRA_MMM_TIMINGS
3180  MM = Teuchos::null;
3181  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckI"))));
3182 #endif
3183 
3184  importer_ = Teuchos::null;
3185  exporter_ = Teuchos::null;
3186  if (importer != Teuchos::null) {
3187  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3188  !importer->getSourceMap()->isSameAs(*getDomainMap()) ||
3189  !importer->getTargetMap()->isSameAs(*getColMap()),
3190  std::invalid_argument, ": importer does not match matrix maps.");
3191  importer_ = importer;
3192  }
3193 
3194 #ifdef HAVE_TPETRA_MMM_TIMINGS
3195  MM = Teuchos::null;
3196  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckE"))));
3197 #endif
3198 
3199  if (exporter != Teuchos::null) {
3200  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3201  !exporter->getSourceMap()->isSameAs(*getRowMap()) ||
3202  !exporter->getTargetMap()->isSameAs(*getRangeMap()),
3203  std::invalid_argument, ": exporter does not match matrix maps.");
3204  exporter_ = exporter;
3205  }
3206 
3207 #ifdef HAVE_TPETRA_MMM_TIMINGS
3208  MM = Teuchos::null;
3209  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXmake"))));
3210 #endif
3211  Teuchos::Array<int> remotePIDs(0); // unused output argument
3212  this->makeImportExport(remotePIDs, false);
3213 
3214 #ifdef HAVE_TPETRA_MMM_TIMINGS
3215  MM = Teuchos::null;
3216  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-fLG"))));
3217 #endif
3218  this->fillLocalGraph(params);
3219 
3220  const bool callComputeGlobalConstants = params.get() == nullptr ||
3221  params->get("compute global constants", true);
3222 
3223  if (callComputeGlobalConstants) {
3224 #ifdef HAVE_TPETRA_MMM_TIMINGS
3225  MM = Teuchos::null;
3226  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cGC (const)"))));
3227 #endif // HAVE_TPETRA_MMM_TIMINGS
3228  this->computeGlobalConstants();
3229  } else {
3230 #ifdef HAVE_TPETRA_MMM_TIMINGS
3231  MM = Teuchos::null;
3232  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cGC (noconst)"))));
3233 #endif // HAVE_TPETRA_MMM_TIMINGS
3234  this->computeLocalConstants();
3235  }
3236 
3237  fillComplete_ = true;
3238 
3239 #ifdef HAVE_TPETRA_MMM_TIMINGS
3240  MM = Teuchos::null;
3241  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cIS"))));
3242 #endif
3243  checkInternalState();
3244 }
3245 
3246 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3248  fillLocalGraph(const Teuchos::RCP<Teuchos::ParameterList>& params) {
3250  typedef typename local_graph_device_type::row_map_type row_map_type;
3251  typedef typename row_map_type::non_const_type non_const_row_map_type;
3252  typedef typename local_graph_device_type::entries_type::non_const_type lclinds_1d_type;
3253  const char tfecfFuncName[] =
3254  "fillLocalGraph (called from fillComplete or "
3255  "expertStaticFillComplete): ";
3256  const size_t lclNumRows = this->getLocalNumRows();
3257 
3258  // This method's goal is to fill in the two arrays (compressed
3259  // sparse row format) that define the sparse graph's structure.
3260 
3261  bool requestOptimizedStorage = true;
3262  if (!params.is_null() && !params->get("Optimize Storage", true)) {
3263  requestOptimizedStorage = false;
3264  }
3265 
3266  // The graph's column indices are currently stored in a 1-D
3267  // format, with row offsets in rowPtrsUnpacked_host_ and local column indices
3268  // in k_lclInds1D_.
3269 
3270  if (debug_) {
3271  auto rowPtrsUnpacked = this->getRowPtrsUnpackedHost();
3272  // The graph's array of row offsets must already be allocated.
3273  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowPtrsUnpacked.extent(0) == 0, std::logic_error,
3274  "rowPtrsUnpacked_host_ has size zero, but shouldn't");
3275  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowPtrsUnpacked.extent(0) != lclNumRows + 1, std::logic_error,
3276  "rowPtrsUnpacked_host_.extent(0) = "
3277  << rowPtrsUnpacked.extent(0) << " != (lclNumRows + 1) = "
3278  << (lclNumRows + 1) << ".");
3279  const size_t numOffsets = rowPtrsUnpacked.extent(0);
3280  const auto valToCheck = rowPtrsUnpacked(numOffsets - 1);
3281  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numOffsets != 0 &&
3282  lclIndsUnpacked_wdv.extent(0) != valToCheck,
3283  std::logic_error, "numOffsets=" << numOffsets << " != 0 "
3284  " and lclIndsUnpacked_wdv.extent(0)="
3285  << lclIndsUnpacked_wdv.extent(0) << " != rowPtrsUnpacked_host_(" << numOffsets << ")=" << valToCheck << ".");
3286  }
3287 
3288  size_t allocSize = 0;
3289  try {
3290  allocSize = this->getLocalAllocationSize();
3291  } catch (std::logic_error& e) {
3292  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::logic_error,
3293  "getLocalAllocationSize threw "
3294  "std::logic_error: "
3295  << e.what());
3296  } catch (std::runtime_error& e) {
3297  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error,
3298  "getLocalAllocationSize threw "
3299  "std::runtime_error: "
3300  << e.what());
3301  } catch (std::exception& e) {
3302  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error,
3303  "getLocalAllocationSize threw "
3304  "std::exception: "
3305  << e.what());
3306  } catch (...) {
3307  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error,
3308  "getLocalAllocationSize threw "
3309  "an exception not a subclass of std::exception.");
3310  }
3311 
3312  if (this->getLocalNumEntries() != allocSize) {
3313  // Use the nonconst version of row_map_type for ptr_d, because
3314  // the latter is const and we need to modify ptr_d here.
3315  non_const_row_map_type ptr_d;
3316  row_map_type ptr_d_const;
3317 
3318  // The graph's current 1-D storage is "unpacked." This means
3319  // the row offsets may differ from what the final row offsets
3320  // should be. This could happen, for example, if the user set
3321  // an upper bound on the number of entries in each row, but
3322  // didn't fill all those entries.
3323 
3324  if (debug_) {
3325  auto rowPtrsUnpacked = this->getRowPtrsUnpackedHost();
3326  if (rowPtrsUnpacked.extent(0) != 0) {
3327  const size_t numOffsets =
3328  static_cast<size_t>(rowPtrsUnpacked.extent(0));
3329  const auto valToCheck = rowPtrsUnpacked(numOffsets - 1);
3330  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(valToCheck != size_t(lclIndsUnpacked_wdv.extent(0)),
3331  std::logic_error,
3332  "(Unpacked branch) Before allocating "
3333  "or packing, k_rowPtrs_("
3334  << (numOffsets - 1) << ")="
3335  << valToCheck << " != lclIndsUnpacked_wdv.extent(0)="
3336  << lclIndsUnpacked_wdv.extent(0) << ".");
3337  }
3338  }
3339 
3340  // Pack the row offsets into ptr_d, by doing a sum-scan of the
3341  // array of valid entry counts per row (k_numRowEntries_).
3342 
3343  // Total number of entries in the matrix on the calling
3344  // process. We will compute this in the loop below. It's
3345  // cheap to compute and useful as a sanity check.
3346  size_t lclTotalNumEntries = 0;
3347  {
3348  // Allocate the packed row offsets array.
3349  ptr_d =
3350  non_const_row_map_type("Tpetra::CrsGraph::ptr", lclNumRows + 1);
3351  ptr_d_const = ptr_d;
3352 
3353  // It's ok that k_numRowEntries_ is a host View; the
3354  // function can handle this.
3355  typename num_row_entries_type::const_type numRowEnt_h = k_numRowEntries_;
3356  if (debug_) {
3357  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(size_t(numRowEnt_h.extent(0)) != lclNumRows,
3358  std::logic_error,
3359  "(Unpacked branch) "
3360  "numRowEnt_h.extent(0)="
3361  << numRowEnt_h.extent(0)
3362  << " != getLocalNumRows()=" << lclNumRows << "");
3363  }
3364 
3365  lclTotalNumEntries = computeOffsetsFromCounts(ptr_d, numRowEnt_h);
3366 
3367  if (debug_) {
3368  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(ptr_d.extent(0)) != lclNumRows + 1,
3369  std::logic_error,
3370  "(Unpacked branch) After allocating "
3371  "ptr_d, ptr_d.extent(0) = "
3372  << ptr_d.extent(0)
3373  << " != lclNumRows+1 = " << (lclNumRows + 1) << ".");
3374  const auto valToCheck =
3375  ::Tpetra::Details::getEntryOnHost(ptr_d, lclNumRows);
3376  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(valToCheck != lclTotalNumEntries, std::logic_error,
3377  "Tpetra::CrsGraph::fillLocalGraph: In unpacked branch, "
3378  "after filling ptr_d, ptr_d(lclNumRows="
3379  << lclNumRows
3380  << ") = " << valToCheck << " != total number of entries "
3381  "on the calling process = "
3382  << lclTotalNumEntries
3383  << ".");
3384  }
3385  }
3386 
3387  // Allocate the array of packed column indices.
3388  lclinds_1d_type ind_d =
3389  lclinds_1d_type("Tpetra::CrsGraph::lclInd", lclTotalNumEntries);
3390 
3391  // k_rowPtrs_ and lclIndsUnpacked_wdv are currently unpacked. Pack
3392  // them, using the packed row offsets array ptr_d that we
3393  // created above.
3394  //
3395  // FIXME (mfh 08 Aug 2014) If "Optimize Storage" is false (in
3396  // CrsMatrix?), we need to keep around the unpacked row
3397  // offsets and column indices.
3398 
3399  // Pack the column indices from unpacked lclIndsUnpacked_wdv into
3400  // packed ind_d. We will replace lclIndsUnpacked_wdv below.
3401  typedef pack_functor<
3402  typename local_graph_device_type::entries_type::non_const_type,
3403  typename local_inds_dualv_type::t_dev::const_type,
3404  row_map_type,
3405  typename local_graph_device_type::row_map_type>
3406  inds_packer_type;
3407  inds_packer_type f(ind_d,
3408  lclIndsUnpacked_wdv.getDeviceView(Access::ReadOnly),
3409  ptr_d, this->getRowPtrsUnpackedDevice());
3410  {
3411  typedef typename decltype(ind_d)::execution_space exec_space;
3412  typedef Kokkos::RangePolicy<exec_space, LocalOrdinal> range_type;
3413  Kokkos::parallel_for(range_type(0, lclNumRows), f);
3414  }
3415 
3416  if (debug_) {
3417  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(ptr_d.extent(0) == 0, std::logic_error,
3418  "(\"Optimize Storage\"=true branch) After packing, "
3419  "ptr_d.extent(0)=0.");
3420  if (ptr_d.extent(0) != 0) {
3421  const size_t numOffsets = static_cast<size_t>(ptr_d.extent(0));
3422  const auto valToCheck =
3423  ::Tpetra::Details::getEntryOnHost(ptr_d, numOffsets - 1);
3424  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(valToCheck) != ind_d.extent(0),
3425  std::logic_error,
3426  "(\"Optimize Storage\"=true branch) "
3427  "After packing, ptr_d("
3428  << (numOffsets - 1) << ")="
3429  << valToCheck << " != ind_d.extent(0)="
3430  << ind_d.extent(0) << ".");
3431  }
3432  }
3433  // Build the local graph.
3434  if (requestOptimizedStorage)
3435  setRowPtrs(ptr_d_const);
3436  else
3437  setRowPtrsPacked(ptr_d_const);
3438  lclIndsPacked_wdv = local_inds_wdv_type(ind_d);
3439  } else { // We don't have to pack, so just set the pointers.
3440  // Set both packed and unpacked rowptrs to this
3441  this->setRowPtrs(rowPtrsUnpacked_dev_);
3442  lclIndsPacked_wdv = lclIndsUnpacked_wdv;
3443 
3444  if (debug_) {
3445  auto rowPtrsPacked_dev = this->getRowPtrsPackedDevice();
3446  auto rowPtrsPacked_host = this->getRowPtrsPackedHost();
3447  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowPtrsPacked_dev.extent(0) == 0, std::logic_error,
3448  "(\"Optimize Storage\"=false branch) "
3449  "rowPtrsPacked_dev_.extent(0) = 0.");
3450  if (rowPtrsPacked_dev.extent(0) != 0) {
3451  const size_t numOffsets =
3452  static_cast<size_t>(rowPtrsPacked_dev.extent(0));
3453  const size_t valToCheck =
3454  rowPtrsPacked_host(numOffsets - 1);
3455  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(valToCheck != size_t(lclIndsPacked_wdv.extent(0)),
3456  std::logic_error,
3457  "(\"Optimize Storage\"=false branch) "
3458  "rowPtrsPacked_dev_("
3459  << (numOffsets - 1) << ")="
3460  << valToCheck
3461  << " != lclIndsPacked_wdv.extent(0)="
3462  << lclIndsPacked_wdv.extent(0) << ".");
3463  }
3464  }
3465  }
3466 
3467  if (debug_) {
3468  auto rowPtrsPacked_dev = this->getRowPtrsPackedDevice();
3469  auto rowPtrsPacked_host = this->getRowPtrsPackedHost();
3470  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(rowPtrsPacked_dev.extent(0)) != lclNumRows + 1,
3471  std::logic_error, "After packing, rowPtrsPacked_dev_.extent(0) = " << rowPtrsPacked_dev.extent(0) << " != lclNumRows+1 = " << (lclNumRows + 1) << ".");
3472  if (rowPtrsPacked_dev.extent(0) != 0) {
3473  const size_t numOffsets = static_cast<size_t>(rowPtrsPacked_dev.extent(0));
3474  const auto valToCheck = rowPtrsPacked_host(numOffsets - 1);
3475  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(valToCheck) != lclIndsPacked_wdv.extent(0),
3476  std::logic_error, "After packing, rowPtrsPacked_dev_(" << (numOffsets - 1) << ") = " << valToCheck << " != lclIndsPacked_wdv.extent(0) = " << lclIndsPacked_wdv.extent(0) << ".");
3477  }
3478  }
3479 
3480  if (requestOptimizedStorage) {
3481  // With optimized storage, we don't need to store
3482  // the array of row entry counts.
3483 
3484  // Free graph data structures that are only needed for
3485  // unpacked 1-D storage.
3486  k_numRowEntries_ = num_row_entries_type();
3487 
3488  // Keep the new 1-D packed allocations.
3489  lclIndsUnpacked_wdv = lclIndsPacked_wdv;
3490 
3491  storageStatus_ = Details::STORAGE_1D_PACKED;
3492  }
3493 
3494  set_need_sync_host_uvm_access(); // make sure kernel setup of indices is fenced before a host access
3495 }
3496 
3497 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3499  replaceColMap(const Teuchos::RCP<const map_type>& newColMap) {
3500  // NOTE: This safety check matches the code, but not the documentation of Crsgraph
3501  //
3502  // FIXME (mfh 18 Aug 2014) This will break if the calling process
3503  // has no entries, because in that case, currently it is neither
3504  // locally nor globally indexed. This will change once we get rid
3505  // of lazy allocation (so that the constructor allocates indices
3506  // and therefore commits to local vs. global).
3507  const char tfecfFuncName[] = "replaceColMap: ";
3508  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3509  isLocallyIndexed() || isGloballyIndexed(), std::runtime_error,
3510  "Requires matching maps and non-static graph.");
3511  colMap_ = newColMap;
3512 }
3513 
3514 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3516  reindexColumns(const Teuchos::RCP<const map_type>& newColMap,
3517  const Teuchos::RCP<const import_type>& newImport,
3518  const bool sortIndicesInEachRow) {
3519  using Teuchos::RCP;
3520  using Teuchos::REDUCE_MIN;
3521  using Teuchos::reduceAll;
3522  typedef GlobalOrdinal GO;
3523  typedef LocalOrdinal LO;
3524  using col_inds_type_dev = typename local_inds_dualv_type::t_dev;
3525  const char tfecfFuncName[] = "reindexColumns: ";
3526 
3527  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3528  isFillComplete(), std::runtime_error,
3529  "The graph is fill complete "
3530  "(isFillComplete() returns true). You must call resumeFill() before "
3531  "you may call this method.");
3532 
3533  // mfh 19 Aug 2014: This method does NOT redistribute data; it
3534  // doesn't claim to do the work of an Import or Export. This
3535  // means that for all processes, the calling process MUST own all
3536  // column indices, in both the old column Map (if it exists) and
3537  // the new column Map. We check this via an all-reduce.
3538  //
3539  // Some processes may be globally indexed, others may be locally
3540  // indexed, and others (that have no graph entries) may be
3541  // neither. This method will NOT change the graph's current
3542  // state. If it's locally indexed, it will stay that way, and
3543  // vice versa. It would easy to add an option to convert indices
3544  // from global to local, so as to save a global-to-local
3545  // conversion pass. However, we don't do this here. The intended
3546  // typical use case is that the graph already has a column Map and
3547  // is locally indexed, and this is the case for which we optimize.
3548 
3549  const LO lclNumRows = static_cast<LO>(this->getLocalNumRows());
3550 
3551  // Attempt to convert indices to the new column Map's version of
3552  // local. This will fail if on the calling process, the graph has
3553  // indices that are not on that process in the new column Map.
3554  // After the local conversion attempt, we will do an all-reduce to
3555  // see if any processes failed.
3556 
3557  // If this is false, then either the graph contains a column index
3558  // which is invalid in the CURRENT column Map, or the graph is
3559  // locally indexed but currently has no column Map. In either
3560  // case, there is no way to convert the current local indices into
3561  // global indices, so that we can convert them into the new column
3562  // Map's local indices. It's possible for this to be true on some
3563  // processes but not others, due to replaceColMap.
3564  bool allCurColIndsValid = true;
3565  // On the calling process, are all valid current column indices
3566  // also in the new column Map on the calling process? In other
3567  // words, does local reindexing suffice, or should the user have
3568  // done an Import or Export instead?
3569  bool localSuffices = true;
3570 
3571  {
3572  // Final arrays for the local indices. We will allocate exactly
3573  // one of these ONLY if the graph is locally indexed on the
3574  // calling process, and ONLY if the graph has one or more entries
3575  // (is not empty) on the calling process. In that case, we
3576  // allocate the first (1-D storage) if the graph has a static
3577  // profile, else we allocate the second (2-D storage).
3578  col_inds_type_dev newLclInds1D_dev;
3579 
3580  // If indices aren't allocated, that means the calling process
3581  // owns no entries in the graph. Thus, there is nothing to
3582  // convert, and it trivially succeeds locally.
3583  if (indicesAreAllocated()) {
3584  if (isLocallyIndexed()) {
3585  if (hasColMap()) { // locally indexed, and currently has a column Map
3586  const map_type& oldColMap = *(getColMap());
3587 
3588  // Allocate storage for the new local indices.
3589  const size_t allocSize = this->getLocalAllocationSize();
3590  auto oldLclInds1D = lclIndsUnpacked_wdv.getDeviceView(Access::ReadOnly);
3591  newLclInds1D_dev = col_inds_type_dev("Tpetra::CrsGraph::lclIndsReindexed",
3592  allocSize);
3593  auto oldLclColMap = oldColMap.getLocalMap();
3594  auto newLclColMap = newColMap->getLocalMap();
3595 
3596  const auto LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
3597  const auto GO_INVALID = Teuchos::OrdinalTraits<GO>::invalid();
3598 
3599  const int NOT_ALL_LOCAL_INDICES_ARE_VALID = 1;
3600  const int LOCAL_DOES_NOT_SUFFICE = 2;
3601  int errorStatus = 0;
3602  Kokkos::parallel_reduce(
3603  "Tpetra::CrsGraph::reindexColumns",
3604  Kokkos::RangePolicy<LocalOrdinal, execution_space>(0, allocSize),
3605  KOKKOS_LAMBDA(const LocalOrdinal k, int& result) {
3606  const LocalOrdinal oldLclCol = oldLclInds1D(k);
3607  if (oldLclCol == LO_INVALID) {
3608  result &= NOT_ALL_LOCAL_INDICES_ARE_VALID;
3609  } else {
3610  const GO gblCol = oldLclColMap.getGlobalElement(oldLclCol);
3611  if (gblCol == GO_INVALID) {
3612  result &= LOCAL_DOES_NOT_SUFFICE;
3613  } else {
3614  const LocalOrdinal newLclCol = newLclColMap.getLocalElement(gblCol);
3615  if (newLclCol == LO_INVALID) {
3616  result &= NOT_ALL_LOCAL_INDICES_ARE_VALID;
3617  } else {
3618  newLclInds1D_dev(k) = newLclCol;
3619  }
3620  }
3621  }
3622  },
3623  Kokkos::LOr<int>(errorStatus));
3624  allCurColIndsValid = !(errorStatus & NOT_ALL_LOCAL_INDICES_ARE_VALID);
3625  localSuffices = !(errorStatus & LOCAL_DOES_NOT_SUFFICE);
3626  } else { // locally indexed, but no column Map
3627  // This case is only possible if replaceColMap() was called
3628  // with a null argument on the calling process. It's
3629  // possible, but it means that this method can't possibly
3630  // succeed, since we have no way of knowing how to convert
3631  // the current local indices to global indices.
3632  allCurColIndsValid = false;
3633  }
3634  } else { // globally indexed
3635  // If the graph is globally indexed, we don't need to save
3636  // local indices, but we _do_ need to know whether the current
3637  // global indices are valid in the new column Map. We may
3638  // need to do a getRemoteIndexList call to find this out.
3639  //
3640  // In this case, it doesn't matter whether the graph currently
3641  // has a column Map. We don't need the old column Map to
3642  // convert from global indices to the _new_ column Map's local
3643  // indices. Furthermore, we can use the same code, whether
3644  // the graph is static or dynamic profile.
3645 
3646  // Test whether the current global indices are in the new
3647  // column Map on the calling process.
3648  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
3649  const RowInfo rowInfo = this->getRowInfo(lclRow);
3650  auto oldGblRowView = this->getGlobalIndsViewHost(rowInfo);
3651  for (size_t k = 0; k < rowInfo.numEntries; ++k) {
3652  const GO gblCol = oldGblRowView(k);
3653  if (!newColMap->isNodeGlobalElement(gblCol)) {
3654  localSuffices = false;
3655  break; // Stop at the first invalid index
3656  }
3657  } // for each entry in the current row
3658  } // for each locally owned row
3659  } // locally or globally indexed
3660  } // whether indices are allocated
3661 
3662  // Do an all-reduce to check both possible error conditions.
3663  int lclSuccess[2];
3664  lclSuccess[0] = allCurColIndsValid ? 1 : 0;
3665  lclSuccess[1] = localSuffices ? 1 : 0;
3666  int gblSuccess[2];
3667  gblSuccess[0] = 0;
3668  gblSuccess[1] = 0;
3669  RCP<const Teuchos::Comm<int>> comm =
3670  getRowMap().is_null() ? Teuchos::null : getRowMap()->getComm();
3671  if (!comm.is_null()) {
3672  reduceAll<int, int>(*comm, REDUCE_MIN, 2, lclSuccess, gblSuccess);
3673  }
3674 
3675  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3676  gblSuccess[0] == 0, std::runtime_error,
3677  "It is not possible to continue."
3678  " The most likely reason is that the graph is locally indexed, but the "
3679  "column Map is missing (null) on some processes, due to a previous call "
3680  "to replaceColMap().");
3681 
3682  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3683  gblSuccess[1] == 0, std::runtime_error,
3684  "On some process, the graph "
3685  "contains column indices that are in the old column Map, but not in the "
3686  "new column Map (on that process). This method does NOT redistribute "
3687  "data; it does not claim to do the work of an Import or Export operation."
3688  " This means that for all processess, the calling process MUST own all "
3689  "column indices, in both the old column Map and the new column Map. In "
3690  "this case, you will need to do an Import or Export operation to "
3691  "redistribute data.");
3692 
3693  // Commit the results.
3694  if (isLocallyIndexed()) {
3695  lclIndsUnpacked_wdv = local_inds_wdv_type(newLclInds1D_dev);
3696  }
3697  // end of scope for newLclInds1D_dev
3698  // sortAndMergeAllIndices needs host access
3699  }
3700 
3701  if (isLocallyIndexed()) {
3702  // We've reindexed, so we don't know if the indices are sorted.
3703  //
3704  // FIXME (mfh 17 Sep 2014) It could make sense to check this,
3705  // since we're already going through all the indices above. We
3706  // could also sort each row in place; that way, we would only
3707  // have to make one pass over the rows.
3708  indicesAreSorted_ = false;
3709  if (sortIndicesInEachRow) {
3710  // NOTE (mfh 17 Sep 2014) The graph must be locally indexed in
3711  // order to call this method.
3712  //
3713  // FIXME (mfh 17 Sep 2014) This violates the strong exception
3714  // guarantee. It would be better to sort the new index arrays
3715  // before committing them.
3716  const bool sorted = false; // need to resort
3717  const bool merged = true; // no need to merge, since no dups
3718  this->sortAndMergeAllIndices(sorted, merged);
3719  }
3720  }
3721  colMap_ = newColMap;
3722 
3723  if (newImport.is_null()) {
3724  // FIXME (mfh 19 Aug 2014) Should use the above all-reduce to
3725  // check whether the input Import is null on any process.
3726  //
3727  // If the domain Map hasn't been set yet, we can't compute a new
3728  // Import object. Leave it what it is; it should be null, but
3729  // it doesn't matter. If the domain Map _has_ been set, then
3730  // compute a new Import object if necessary.
3731  if (!domainMap_.is_null()) {
3732  if (!domainMap_->isSameAs(*newColMap)) {
3733  importer_ = Teuchos::rcp(new import_type(domainMap_, newColMap));
3734  } else {
3735  importer_ = Teuchos::null; // don't need an Import
3736  }
3737  }
3738  } else {
3739  // The caller gave us an Import object. Assume that it's valid.
3740  importer_ = newImport;
3741  }
3742 }
3743 
3744 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3746  replaceDomainMap(const Teuchos::RCP<const map_type>& newDomainMap) {
3747  const char prefix[] = "Tpetra::CrsGraph::replaceDomainMap: ";
3748  TEUCHOS_TEST_FOR_EXCEPTION(
3749  colMap_.is_null(), std::invalid_argument, prefix << "You may not call "
3750  "this method unless the graph already has a column Map.");
3751  TEUCHOS_TEST_FOR_EXCEPTION(
3752  newDomainMap.is_null(), std::invalid_argument,
3753  prefix << "The new domain Map must be nonnull.");
3754 
3755  // Create a new importer, if needed
3756  Teuchos::RCP<const import_type> newImporter = Teuchos::null;
3757  if (newDomainMap != colMap_ && (!newDomainMap->isSameAs(*colMap_))) {
3758  newImporter = rcp(new import_type(newDomainMap, colMap_));
3759  }
3760  this->replaceDomainMapAndImporter(newDomainMap, newImporter);
3761 }
3762 
3763 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3765  replaceDomainMapAndImporter(const Teuchos::RCP<const map_type>& newDomainMap,
3766  const Teuchos::RCP<const import_type>& newImporter) {
3767  const char prefix[] = "Tpetra::CrsGraph::replaceDomainMapAndImporter: ";
3768  TEUCHOS_TEST_FOR_EXCEPTION(
3769  colMap_.is_null(), std::invalid_argument, prefix << "You may not call "
3770  "this method unless the graph already has a column Map.");
3771  TEUCHOS_TEST_FOR_EXCEPTION(
3772  newDomainMap.is_null(), std::invalid_argument,
3773  prefix << "The new domain Map must be nonnull.");
3774 
3775  if (debug_) {
3776  if (newImporter.is_null()) {
3777  // It's not a good idea to put expensive operations in a macro
3778  // clause, even if they are side effect - free, because macros
3779  // don't promise that they won't evaluate their arguments more
3780  // than once. It's polite for them to do so, but not required.
3781  const bool colSameAsDom = colMap_->isSameAs(*newDomainMap);
3782  TEUCHOS_TEST_FOR_EXCEPTION(!colSameAsDom, std::invalid_argument,
3783  "If the new Import is null, "
3784  "then the new domain Map must be the same as the current column Map.");
3785  } else {
3786  const bool colSameAsTgt =
3787  colMap_->isSameAs(*(newImporter->getTargetMap()));
3788  const bool newDomSameAsSrc =
3789  newDomainMap->isSameAs(*(newImporter->getSourceMap()));
3790  TEUCHOS_TEST_FOR_EXCEPTION(!colSameAsTgt || !newDomSameAsSrc, std::invalid_argument,
3791  "If the "
3792  "new Import is nonnull, then the current column Map must be the same "
3793  "as the new Import's target Map, and the new domain Map must be the "
3794  "same as the new Import's source Map.");
3795  }
3796  }
3797 
3798  domainMap_ = newDomainMap;
3799  importer_ = Teuchos::rcp_const_cast<import_type>(newImporter);
3800 }
3801 
3802 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3804  replaceRangeMap(const Teuchos::RCP<const map_type>& newRangeMap) {
3805  const char prefix[] = "Tpetra::CrsGraph::replaceRangeMap: ";
3806  TEUCHOS_TEST_FOR_EXCEPTION(
3807  rowMap_.is_null(), std::invalid_argument, prefix << "You may not call "
3808  "this method unless the graph already has a row Map.");
3809  TEUCHOS_TEST_FOR_EXCEPTION(
3810  newRangeMap.is_null(), std::invalid_argument,
3811  prefix << "The new range Map must be nonnull.");
3812 
3813  // Create a new exporter, if needed
3814  Teuchos::RCP<const export_type> newExporter = Teuchos::null;
3815  if (newRangeMap != rowMap_ && (!newRangeMap->isSameAs(*rowMap_))) {
3816  newExporter = rcp(new export_type(rowMap_, newRangeMap));
3817  }
3818  this->replaceRangeMapAndExporter(newRangeMap, newExporter);
3819 }
3820 
3821 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3823  replaceRangeMapAndExporter(const Teuchos::RCP<const map_type>& newRangeMap,
3824  const Teuchos::RCP<const export_type>& newExporter) {
3825  const char prefix[] = "Tpetra::CrsGraph::replaceRangeMapAndExporter: ";
3826  TEUCHOS_TEST_FOR_EXCEPTION(
3827  rowMap_.is_null(), std::invalid_argument, prefix << "You may not call "
3828  "this method unless the graph already has a column Map.");
3829  TEUCHOS_TEST_FOR_EXCEPTION(
3830  newRangeMap.is_null(), std::invalid_argument,
3831  prefix << "The new domain Map must be nonnull.");
3832 
3833  if (debug_) {
3834  if (newExporter.is_null()) {
3835  // It's not a good idea to put expensive operations in a macro
3836  // clause, even if they are side effect - free, because macros
3837  // don't promise that they won't evaluate their arguments more
3838  // than once. It's polite for them to do so, but not required.
3839  const bool rowSameAsRange = rowMap_->isSameAs(*newRangeMap);
3840  TEUCHOS_TEST_FOR_EXCEPTION(!rowSameAsRange, std::invalid_argument,
3841  "If the new Export is null, "
3842  "then the new range Map must be the same as the current row Map.");
3843  } else {
3844  const bool newRangeSameAsTgt =
3845  newRangeMap->isSameAs(*(newExporter->getTargetMap()));
3846  const bool rowSameAsSrc =
3847  rowMap_->isSameAs(*(newExporter->getSourceMap()));
3848  TEUCHOS_TEST_FOR_EXCEPTION(!rowSameAsSrc || !newRangeSameAsTgt, std::invalid_argument,
3849  "If the "
3850  "new Export is nonnull, then the current row Map must be the same "
3851  "as the new Export's source Map, and the new range Map must be the "
3852  "same as the new Export's target Map.");
3853  }
3854  }
3855 
3856  rangeMap_ = newRangeMap;
3857  exporter_ = Teuchos::rcp_const_cast<export_type>(newExporter);
3858 }
3859 
3860 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3864  return local_graph_device_type(
3865  lclIndsPacked_wdv.getDeviceView(Access::ReadWrite),
3866  this->getRowPtrsPackedDevice());
3867 }
3868 
3869 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3872  getLocalGraphHost() const {
3873  return local_graph_host_type(
3874  lclIndsPacked_wdv.getHostView(Access::ReadWrite),
3875  this->getRowPtrsPackedHost());
3876 }
3877 
3878 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3881  using Teuchos::ArrayView;
3882  using Teuchos::outArg;
3883  using Teuchos::reduceAll;
3884  using ::Tpetra::Details::ProfilingRegion;
3885  typedef global_size_t GST;
3886 
3887  ProfilingRegion regionCGC("Tpetra::CrsGraph::computeGlobalConstants");
3888 
3889  this->computeLocalConstants();
3890 
3891  // Compute global constants from local constants. Processes that
3892  // already have local constants still participate in the
3893  // all-reduces, using their previously computed values.
3894  if (!this->haveGlobalConstants_) {
3895  const Teuchos::Comm<int>& comm = *(this->getComm());
3896  // Promote all the nodeNum* and nodeMaxNum* quantities from
3897  // size_t to global_size_t, when doing the all-reduces for
3898  // globalNum* / globalMaxNum* results.
3899  //
3900  // FIXME (mfh 07 May 2013) Unfortunately, we either have to do
3901  // this in two all-reduces (one for the sum and the other for
3902  // the max), or use a custom MPI_Op that combines the sum and
3903  // the max. The latter might even be slower than two
3904  // all-reduces on modern network hardware. It would also be a
3905  // good idea to use nonblocking all-reduces (MPI 3), so that we
3906  // don't have to wait around for the first one to finish before
3907  // starting the second one.
3908  GST lcl, gbl;
3909  lcl = static_cast<GST>(this->getLocalNumEntries());
3910 
3911  reduceAll<int, GST>(comm, Teuchos::REDUCE_SUM, 1, &lcl, &gbl);
3912  this->globalNumEntries_ = gbl;
3913 
3914  const GST lclMaxNumRowEnt = static_cast<GST>(this->nodeMaxNumRowEntries_);
3915  reduceAll<int, GST>(comm, Teuchos::REDUCE_MAX, lclMaxNumRowEnt,
3916  outArg(this->globalMaxNumRowEntries_));
3917  this->haveGlobalConstants_ = true;
3918  }
3919 }
3920 
3921 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3924  using ::Tpetra::Details::ProfilingRegion;
3925 
3926  ProfilingRegion regionCLC("Tpetra::CrsGraph::computeLocalConstants");
3927  if (this->haveLocalConstants_) {
3928  return;
3929  }
3930 
3931  // Reset local properties
3932  this->nodeMaxNumRowEntries_ =
3933  Teuchos::OrdinalTraits<size_t>::invalid();
3934 
3935  using LO = local_ordinal_type;
3936 
3937  auto ptr = this->getRowPtrsPackedDevice();
3938  const LO lclNumRows = ptr.extent(0) == 0 ? static_cast<LO>(0) : (static_cast<LO>(ptr.extent(0)) - static_cast<LO>(1));
3939 
3940  const LO lclMaxNumRowEnt =
3941  ::Tpetra::Details::maxDifference("Tpetra::CrsGraph: nodeMaxNumRowEntries",
3942  ptr, lclNumRows);
3943  this->nodeMaxNumRowEntries_ = static_cast<size_t>(lclMaxNumRowEnt);
3944  this->haveLocalConstants_ = true;
3945 }
3946 
3947 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3948 std::pair<size_t, std::string>
3950  makeIndicesLocal(const bool verbose) {
3952  using std::endl;
3953  using Teuchos::arcp;
3954  using Teuchos::Array;
3955  typedef LocalOrdinal LO;
3956  typedef GlobalOrdinal GO;
3957  typedef device_type DT;
3958  typedef typename local_graph_device_type::row_map_type::non_const_value_type offset_type;
3959  typedef typename num_row_entries_type::non_const_value_type num_ent_type;
3960  const char tfecfFuncName[] = "makeIndicesLocal: ";
3961  ProfilingRegion regionMakeIndicesLocal("Tpetra::CrsGraph::makeIndicesLocal");
3962 
3963  std::unique_ptr<std::string> prefix;
3964  if (verbose) {
3965  prefix = this->createPrefix("CrsGraph", "makeIndicesLocal");
3966  std::ostringstream os;
3967  os << *prefix << "lclNumRows: " << getLocalNumRows() << endl;
3968  std::cerr << os.str();
3969  }
3970 
3971  // These are somewhat global properties, so it's safe to have
3972  // exception checks for them, rather than returning an error code.
3973  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->hasColMap(), std::logic_error,
3974  "The graph does not have a "
3975  "column Map yet. This method should never be called in that case. "
3976  "Please report this bug to the Tpetra developers.");
3977  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->getColMap().is_null(), std::logic_error,
3978  "The graph claims "
3979  "that it has a column Map, because hasColMap() returns true. However, "
3980  "the result of getColMap() is null. This should never happen. Please "
3981  "report this bug to the Tpetra developers.");
3982 
3983  // Return value 1: The number of column indices (counting
3984  // duplicates) that could not be converted to local indices,
3985  // because they were not in the column Map on the calling process.
3986  size_t lclNumErrs = 0;
3987  std::ostringstream errStrm; // for return value 2 (error string)
3988 
3989  const LO lclNumRows = static_cast<LO>(this->getLocalNumRows());
3990  const map_type& colMap = *(this->getColMap());
3991 
3992  if (this->isGloballyIndexed() && lclNumRows != 0) {
3993  // This is a host-accessible View.
3994  typename num_row_entries_type::const_type h_numRowEnt =
3995  this->k_numRowEntries_;
3996 
3997  auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
3998 
3999  // Allocate space for local indices.
4000  if (rowPtrsUnpacked_host.extent(0) == 0) {
4001  errStrm << "Unpacked row pointers (rowPtrsUnpacked_dev_) has length 0. This should never "
4002  "happen here. Please report this bug to the Tpetra developers."
4003  << endl;
4004  // Need to return early.
4005  return std::make_pair(Tpetra::Details::OrdinalTraits<size_t>::invalid(),
4006  errStrm.str());
4007  }
4008  const auto numEnt = rowPtrsUnpacked_host(lclNumRows);
4009 
4010  // mfh 17 Dec 2016: We don't need initial zero-fill of
4011  // lclIndsUnpacked_wdv, because we will fill it below anyway.
4012  // AllowPadding would only help for aligned access (e.g.,
4013  // for vectorization) if we also were to pad each row to the
4014  // same alignment, so we'll skip AllowPadding for now.
4015 
4016  // using Kokkos::AllowPadding;
4017  using Kokkos::view_alloc;
4018  using Kokkos::WithoutInitializing;
4019 
4020  // When giving the label as an argument to
4021  // Kokkos::view_alloc, the label must be a string and not a
4022  // char*, else the code won't compile. This is because
4023  // view_alloc also allows a raw pointer as its first
4024  // argument. See
4025  // https://github.com/kokkos/kokkos/issues/434. This is a
4026  // large allocation typically, so the overhead of creating
4027  // an std::string is minor.
4028  const std::string label("Tpetra::CrsGraph::lclInd");
4029  if (verbose) {
4030  std::ostringstream os;
4031  os << *prefix << "(Re)allocate lclInd_wdv: old="
4032  << lclIndsUnpacked_wdv.extent(0) << ", new=" << numEnt << endl;
4033  std::cerr << os.str();
4034  }
4035 
4036  local_inds_dualv_type lclInds_dualv =
4037  local_inds_dualv_type(view_alloc(label, WithoutInitializing),
4038  numEnt);
4039  lclIndsUnpacked_wdv = local_inds_wdv_type(lclInds_dualv);
4040 
4041  auto lclColMap = colMap.getLocalMap();
4042  // This is a "device mirror" of the host View h_numRowEnt.
4043  //
4044  // NOTE (mfh 27 Sep 2016) Currently, the right way to get a
4045  // Device instance is to use its default constructor. See the
4046  // following Kokkos issue:
4047  //
4048  // https://github.com/kokkos/kokkos/issues/442
4049  if (verbose) {
4050  std::ostringstream os;
4051  os << *prefix << "Allocate device mirror k_numRowEnt: "
4052  << h_numRowEnt.extent(0) << endl;
4053  std::cerr << os.str();
4054  }
4055  auto k_numRowEnt =
4056  Kokkos::create_mirror_view_and_copy(device_type(), h_numRowEnt);
4057 
4059  lclNumErrs =
4060  convertColumnIndicesFromGlobalToLocal<LO, GO, DT, offset_type, num_ent_type>(
4061  lclIndsUnpacked_wdv.getDeviceView(Access::OverwriteAll),
4062  gblInds_wdv.getDeviceView(Access::ReadOnly),
4063  this->getRowPtrsUnpackedDevice(),
4064  lclColMap,
4065  k_numRowEnt);
4066  if (lclNumErrs != 0) {
4067  const int myRank = [this]() {
4068  auto map = this->getMap();
4069  if (map.is_null()) {
4070  return 0;
4071  } else {
4072  auto comm = map->getComm();
4073  return comm.is_null() ? 0 : comm->getRank();
4074  }
4075  }();
4076  const bool pluralNumErrs = (lclNumErrs != static_cast<size_t>(1));
4077  errStrm << "(Process " << myRank << ") When converting column "
4078  "indices from global to local, we encountered "
4079  << lclNumErrs
4080  << " ind" << (pluralNumErrs ? "ices" : "ex")
4081  << " that do" << (pluralNumErrs ? "es" : "")
4082  << " not live in the column Map on this process." << endl;
4083  }
4084 
4085  // We've converted column indices from global to local, so we
4086  // can deallocate the global column indices (which we know are
4087  // in 1-D storage, because the graph has static profile).
4088  if (verbose) {
4089  std::ostringstream os;
4090  os << *prefix << "Free gblInds_wdv: "
4091  << gblInds_wdv.extent(0) << endl;
4092  std::cerr << os.str();
4093  }
4094  gblInds_wdv = global_inds_wdv_type();
4095  } // globallyIndexed() && lclNumRows > 0
4096 
4097  this->indicesAreLocal_ = true;
4098  this->indicesAreGlobal_ = false;
4099  this->checkInternalState();
4100 
4101  return std::make_pair(lclNumErrs, errStrm.str());
4102 }
4103 
4104 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4106  makeColMap(Teuchos::Array<int>& remotePIDs) {
4108  using std::endl;
4109  const char tfecfFuncName[] = "makeColMap";
4110 
4111  ProfilingRegion regionSortAndMerge("Tpetra::CrsGraph::makeColMap");
4112  std::unique_ptr<std::string> prefix;
4113  if (verbose_) {
4114  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4115  std::ostringstream os;
4116  os << *prefix << "Start" << endl;
4117  std::cerr << os.str();
4118  }
4119 
4120  // this->colMap_ should be null at this point, but we accept the
4121  // future possibility that it might not be (esp. if we decide
4122  // later to support graph structure changes after first
4123  // fillComplete, which CrsGraph does not currently (as of 12 Feb
4124  // 2017) support).
4125  Teuchos::RCP<const map_type> colMap = this->colMap_;
4126  const bool sortEachProcsGids =
4127  this->sortGhostsAssociatedWithEachProcessor_;
4128 
4129  // FIXME (mfh 12 Feb 2017) ::Tpetra::Details::makeColMap returns a
4130  // per-process error code. If an error does occur on a process,
4131  // ::Tpetra::Details::makeColMap does NOT promise that all processes will
4132  // notice that error. This is the caller's responsibility. For
4133  // now, we only propagate (to all processes) and report the error
4134  // in debug mode. In the future, we need to add the local/global
4135  // error handling scheme used in BlockCrsMatrix to this class.
4136  if (debug_) {
4137  using Teuchos::outArg;
4138  using Teuchos::REDUCE_MIN;
4139  using Teuchos::reduceAll;
4140 
4141  std::ostringstream errStrm;
4142  const int lclErrCode =
4143  Details::makeColMap(colMap, remotePIDs,
4144  getDomainMap(), *this, sortEachProcsGids, &errStrm);
4145  auto comm = this->getComm();
4146  if (!comm.is_null()) {
4147  const int lclSuccess = (lclErrCode == 0) ? 1 : 0;
4148  int gblSuccess = 0; // output argument
4149  reduceAll<int, int>(*comm, REDUCE_MIN, lclSuccess,
4150  outArg(gblSuccess));
4151  if (gblSuccess != 1) {
4152  std::ostringstream os;
4153  Details::gathervPrint(os, errStrm.str(), *comm);
4154  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error,
4155  ": An error happened on at "
4156  "least one process in the CrsGraph's communicator. "
4157  "Here are all processes' error messages:"
4158  << std::endl
4159  << os.str());
4160  }
4161  }
4162  } else {
4163  (void)Details::makeColMap(colMap, remotePIDs,
4164  getDomainMap(), *this, sortEachProcsGids, nullptr);
4165  }
4166  // See above. We want to admit the possibility of makeColMap
4167  // actually revising an existing column Map, even though that
4168  // doesn't currently (as of 10 May 2017) happen.
4169  this->colMap_ = colMap;
4170 
4171  checkInternalState();
4172  if (verbose_) {
4173  std::ostringstream os;
4174  os << *prefix << "Done" << endl;
4175  std::cerr << os.str();
4176  }
4177 }
4178 
4179 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4181  sortAndMergeAllIndices(const bool sorted, const bool merged) {
4182  using std::endl;
4183  using LO = LocalOrdinal;
4184  using host_execution_space =
4185  typename Kokkos::View<LO*, device_type>::host_mirror_type::
4186  execution_space;
4187  using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
4188  const char tfecfFuncName[] = "sortAndMergeAllIndices";
4189  Details::ProfilingRegion regionSortAndMerge("Tpetra::CrsGraph::sortAndMergeAllIndices");
4190 
4191  std::unique_ptr<std::string> prefix;
4192  if (verbose_) {
4193  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4194  std::ostringstream os;
4195  os << *prefix << "Start: "
4196  << "sorted=" << (sorted ? "true" : "false")
4197  << ", merged=" << (merged ? "true" : "false") << endl;
4198  std::cerr << os.str();
4199  }
4200  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isGloballyIndexed(), std::logic_error,
4201  "This method may only be called after makeIndicesLocal.");
4202  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!merged && this->isStorageOptimized(), std::logic_error,
4203  "The graph is already storage optimized, so we shouldn't be "
4204  "merging any indices. "
4205  "Please report this bug to the Tpetra developers.");
4206 
4207  if (!sorted || !merged) {
4208  const LO lclNumRows(this->getLocalNumRows());
4209  auto range = range_type(0, lclNumRows);
4210 
4211  if (verbose_) {
4212  size_t totalNumDups = 0;
4213  // Sync and mark-modified the local indices before disabling WDV tracking
4214  lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
4216  Kokkos::parallel_reduce(
4217  range,
4218  [this, sorted, merged](const LO lclRow, size_t& numDups) {
4219  const RowInfo rowInfo = this->getRowInfo(lclRow);
4220  numDups += this->sortAndMergeRowIndices(rowInfo, sorted, merged);
4221  },
4222  totalNumDups);
4224  std::ostringstream os;
4225  os << *prefix << "totalNumDups=" << totalNumDups << endl;
4226  std::cerr << os.str();
4227  } else {
4228  // make sure that host rowptrs have been created before we enter the parallel region
4229  (void)this->getRowPtrsUnpackedHost();
4230  // Sync and mark-modified the local indices before disabling WDV tracking
4231  lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
4233  Kokkos::parallel_for(range,
4234  [this, sorted, merged](const LO lclRow) {
4235  const RowInfo rowInfo = this->getRowInfo(lclRow);
4236  this->sortAndMergeRowIndices(rowInfo, sorted, merged);
4237  });
4239  }
4240  this->indicesAreSorted_ = true; // we just sorted every row
4241  this->noRedundancies_ = true; // we just merged every row
4242  }
4243 
4244  if (verbose_) {
4245  std::ostringstream os;
4246  os << *prefix << "Done" << endl;
4247  std::cerr << os.str();
4248  }
4249 }
4250 
4251 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4253  makeImportExport(Teuchos::Array<int>& remotePIDs,
4254  const bool useRemotePIDs) {
4255  using Teuchos::ParameterList;
4256  using Teuchos::RCP;
4257  using Teuchos::rcp;
4258  using ::Tpetra::Details::ProfilingRegion;
4259  const char tfecfFuncName[] = "makeImportExport: ";
4260  ProfilingRegion regionMIE("Tpetra::CrsGraph::makeImportExport");
4261 
4262  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->hasColMap(), std::logic_error,
4263  "This method may not be called unless the graph has a column Map.");
4264  RCP<ParameterList> params = this->getNonconstParameterList(); // could be null
4265 
4266  // Don't do any checks to see if we need to create the Import, if
4267  // it exists already.
4268  //
4269  // FIXME (mfh 25 Mar 2013) This will become incorrect if we
4270  // change CrsGraph in the future to allow changing the column
4271  // Map after fillComplete. For now, the column Map is fixed
4272  // after the first fillComplete call.
4273  if (importer_.is_null()) {
4274  // Create the Import instance if necessary.
4275  if (domainMap_ != colMap_ && (!domainMap_->isSameAs(*colMap_))) {
4276  if (params.is_null() || !params->isSublist("Import")) {
4277  if (useRemotePIDs) {
4278  importer_ = rcp(new import_type(domainMap_, colMap_, remotePIDs));
4279  } else {
4280  importer_ = rcp(new import_type(domainMap_, colMap_));
4281  }
4282  } else {
4283  RCP<ParameterList> importSublist = sublist(params, "Import", true);
4284  if (useRemotePIDs) {
4285  RCP<import_type> newImp =
4286  rcp(new import_type(domainMap_, colMap_, remotePIDs,
4287  importSublist));
4288  importer_ = newImp;
4289  } else {
4290  importer_ = rcp(new import_type(domainMap_, colMap_, importSublist));
4291  }
4292  }
4293  }
4294  }
4295 
4296  // Don't do any checks to see if we need to create the Export, if
4297  // it exists already.
4298  if (exporter_.is_null()) {
4299  // Create the Export instance if necessary.
4300  if (rangeMap_ != rowMap_ && !rangeMap_->isSameAs(*rowMap_)) {
4301  if (params.is_null() || !params->isSublist("Export")) {
4302  exporter_ = rcp(new export_type(rowMap_, rangeMap_));
4303  } else {
4304  RCP<ParameterList> exportSublist = sublist(params, "Export", true);
4305  exporter_ = rcp(new export_type(rowMap_, rangeMap_, exportSublist));
4306  }
4307  }
4308  }
4309 }
4310 
4311 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4312 std::string
4314  description() const {
4315  std::ostringstream oss;
4316  oss << dist_object_type::description();
4317  if (isFillComplete()) {
4318  oss << "{status = fill complete"
4319  << ", global rows = " << getGlobalNumRows()
4320  << ", global cols = " << getGlobalNumCols()
4321  << ", global num entries = " << getGlobalNumEntries()
4322  << "}";
4323  } else {
4324  oss << "{status = fill not complete"
4325  << ", global rows = " << getGlobalNumRows()
4326  << "}";
4327  }
4328  return oss.str();
4329 }
4330 
4331 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4333  describe(Teuchos::FancyOStream& out,
4334  const Teuchos::EVerbosityLevel verbLevel) const {
4335  using std::endl;
4336  using std::setw;
4337  using Teuchos::ArrayView;
4338  using Teuchos::Comm;
4339  using Teuchos::RCP;
4340  using Teuchos::VERB_DEFAULT;
4341  using Teuchos::VERB_EXTREME;
4342  using Teuchos::VERB_HIGH;
4343  using Teuchos::VERB_LOW;
4344  using Teuchos::VERB_MEDIUM;
4345  using Teuchos::VERB_NONE;
4346 
4347  Teuchos::EVerbosityLevel vl = verbLevel;
4348  if (vl == VERB_DEFAULT) vl = VERB_LOW;
4349  RCP<const Comm<int>> comm = this->getComm();
4350  const int myImageID = comm->getRank(),
4351  numImages = comm->getSize();
4352  size_t width = 1;
4353  for (size_t dec = 10; dec < getGlobalNumRows(); dec *= 10) {
4354  ++width;
4355  }
4356  width = std::max<size_t>(width, static_cast<size_t>(11)) + 2;
4357  Teuchos::OSTab tab(out);
4358  // none: print nothing
4359  // low: print O(1) info from node 0
4360  // medium: print O(P) info, num entries per node
4361  // high: print O(N) info, num entries per row
4362  // extreme: print O(NNZ) info: print graph indices
4363  //
4364  // for medium and higher, print constituent objects at specified verbLevel
4365  if (vl != VERB_NONE) {
4366  if (myImageID == 0) out << this->description() << std::endl;
4367  // O(1) globals, minus what was already printed by description()
4368  if (isFillComplete() && myImageID == 0) {
4369  out << "Global max number of row entries = " << globalMaxNumRowEntries_ << std::endl;
4370  }
4371  // constituent objects
4372  if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
4373  if (myImageID == 0) out << "\nRow map: " << std::endl;
4374  rowMap_->describe(out, vl);
4375  if (colMap_ != Teuchos::null) {
4376  if (myImageID == 0) out << "\nColumn map: " << std::endl;
4377  colMap_->describe(out, vl);
4378  }
4379  if (domainMap_ != Teuchos::null) {
4380  if (myImageID == 0) out << "\nDomain map: " << std::endl;
4381  domainMap_->describe(out, vl);
4382  }
4383  if (rangeMap_ != Teuchos::null) {
4384  if (myImageID == 0) out << "\nRange map: " << std::endl;
4385  rangeMap_->describe(out, vl);
4386  }
4387  }
4388  // O(P) data
4389  if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
4390  for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
4391  if (myImageID == imageCtr) {
4392  out << "Node ID = " << imageCtr << std::endl
4393  << "Node number of entries = " << this->getLocalNumEntries() << std::endl
4394  << "Node max number of entries = " << nodeMaxNumRowEntries_ << std::endl;
4395  if (!indicesAreAllocated()) {
4396  out << "Indices are not allocated." << std::endl;
4397  }
4398  }
4399  comm->barrier();
4400  comm->barrier();
4401  comm->barrier();
4402  }
4403  }
4404  // O(N) and O(NNZ) data
4405  if (vl == VERB_HIGH || vl == VERB_EXTREME) {
4406  for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
4407  if (myImageID == imageCtr) {
4408  out << std::setw(width) << "Node ID"
4409  << std::setw(width) << "Global Row"
4410  << std::setw(width) << "Num Entries";
4411  if (vl == VERB_EXTREME) {
4412  out << " Entries";
4413  }
4414  out << std::endl;
4415  const LocalOrdinal lclNumRows =
4416  static_cast<LocalOrdinal>(this->getLocalNumRows());
4417  for (LocalOrdinal r = 0; r < lclNumRows; ++r) {
4418  const RowInfo rowinfo = this->getRowInfo(r);
4419  GlobalOrdinal gid = rowMap_->getGlobalElement(r);
4420  out << std::setw(width) << myImageID
4421  << std::setw(width) << gid
4422  << std::setw(width) << rowinfo.numEntries;
4423  if (vl == VERB_EXTREME) {
4424  out << " ";
4425  if (isGloballyIndexed()) {
4426  auto rowview = gblInds_wdv.getHostView(Access::ReadOnly);
4427  for (size_t j = 0; j < rowinfo.numEntries; ++j) {
4428  GlobalOrdinal colgid = rowview[j + rowinfo.offset1D];
4429  out << colgid << " ";
4430  }
4431  } else if (isLocallyIndexed()) {
4432  auto rowview = lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
4433  for (size_t j = 0; j < rowinfo.numEntries; ++j) {
4434  LocalOrdinal collid = rowview[j + rowinfo.offset1D];
4435  out << colMap_->getGlobalElement(collid) << " ";
4436  }
4437  }
4438  }
4439  out << std::endl;
4440  }
4441  }
4442  comm->barrier();
4443  comm->barrier();
4444  comm->barrier();
4445  }
4446  }
4447  }
4448 }
4449 
4450 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4452  checkSizes(const SrcDistObject& /* source */) {
4453  // It's not clear what kind of compatibility checks on sizes can
4454  // be performed here. Epetra_CrsGraph doesn't check any sizes for
4455  // compatibility.
4456  return true;
4457 }
4458 
4459 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4462  const size_t numSameIDs,
4463  const Kokkos::DualView<const local_ordinal_type*,
4464  buffer_device_type>& permuteToLIDs,
4465  const Kokkos::DualView<const local_ordinal_type*,
4466  buffer_device_type>& permuteFromLIDs,
4467  const CombineMode /*CM*/) {
4468  using std::endl;
4469  using LO = local_ordinal_type;
4470  using GO = global_ordinal_type;
4471  using this_CRS_type = CrsGraph<LO, GO, node_type>;
4472  const char tfecfFuncName[] = "copyAndPermute: ";
4473  const bool verbose = verbose_;
4474 
4475  std::unique_ptr<std::string> prefix;
4476  if (verbose) {
4477  prefix = this->createPrefix("CrsGraph", "copyAndPermute");
4478  std::ostringstream os;
4479  os << *prefix << endl;
4480  std::cerr << os.str();
4481  }
4482 
4483  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(permuteToLIDs.extent(0) != permuteFromLIDs.extent(0),
4484  std::runtime_error, "permuteToLIDs.extent(0) = " << permuteToLIDs.extent(0) << " != permuteFromLIDs.extent(0) = " << permuteFromLIDs.extent(0) << ".");
4485 
4486  // We know from checkSizes that the source object is a
4487  // row_graph_type, so we don't need to check again.
4488  const row_graph_type& srcRowGraph =
4489  dynamic_cast<const row_graph_type&>(source);
4490 
4491  if (verbose) {
4492  std::ostringstream os;
4493  os << *prefix << "Compute padding" << endl;
4494  std::cerr << os.str();
4495  }
4496  auto padding = computeCrsPadding(srcRowGraph, numSameIDs,
4497  permuteToLIDs, permuteFromLIDs, verbose);
4498  applyCrsPadding(*padding, verbose);
4499 
4500  // If the source object is actually a CrsGraph, we can use view
4501  // mode instead of copy mode to access the entries in each row,
4502  // if the graph is not fill complete.
4503  const this_CRS_type* srcCrsGraph =
4504  dynamic_cast<const this_CRS_type*>(&source);
4505 
4506  const map_type& srcRowMap = *(srcRowGraph.getRowMap());
4507  const map_type& tgtRowMap = *(getRowMap());
4508  const bool src_filled = srcRowGraph.isFillComplete();
4509  nonconst_global_inds_host_view_type row_copy;
4510  LO myid = 0;
4511 
4512  //
4513  // "Copy" part of "copy and permute."
4514  //
4515  if (src_filled || srcCrsGraph == nullptr) {
4516  if (verbose) {
4517  std::ostringstream os;
4518  os << *prefix << "src_filled || srcCrsGraph == nullptr" << endl;
4519  std::cerr << os.str();
4520  }
4521  // If the source graph is fill complete, we can't use view mode,
4522  // because the data might be stored in a different format not
4523  // compatible with the expectations of view mode. Also, if the
4524  // source graph is not a CrsGraph, we can't use view mode,
4525  // because RowGraph only provides copy mode access to the data.
4526  for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
4527  const GO gid = srcRowMap.getGlobalElement(myid);
4528  size_t row_length = srcRowGraph.getNumEntriesInGlobalRow(gid);
4529  Kokkos::resize(row_copy, row_length);
4530  size_t check_row_length = 0;
4531  srcRowGraph.getGlobalRowCopy(gid, row_copy, check_row_length);
4532  this->insertGlobalIndices(gid, row_length, row_copy.data());
4533  }
4534  } else {
4535  if (verbose) {
4536  std::ostringstream os;
4537  os << *prefix << "! src_filled && srcCrsGraph != nullptr" << endl;
4538  std::cerr << os.str();
4539  }
4540  for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
4541  const GO gid = srcRowMap.getGlobalElement(myid);
4542  global_inds_host_view_type row;
4543  srcCrsGraph->getGlobalRowView(gid, row);
4544  this->insertGlobalIndices(gid, row.extent(0), row.data());
4545  }
4546  }
4547 
4548  //
4549  // "Permute" part of "copy and permute."
4550  //
4551  auto permuteToLIDs_h = permuteToLIDs.view_host();
4552  auto permuteFromLIDs_h = permuteFromLIDs.view_host();
4553 
4554  if (src_filled || srcCrsGraph == nullptr) {
4555  for (LO i = 0; i < static_cast<LO>(permuteToLIDs_h.extent(0)); ++i) {
4556  const GO mygid = tgtRowMap.getGlobalElement(permuteToLIDs_h[i]);
4557  const GO srcgid = srcRowMap.getGlobalElement(permuteFromLIDs_h[i]);
4558  size_t row_length = srcRowGraph.getNumEntriesInGlobalRow(srcgid);
4559  Kokkos::resize(row_copy, row_length);
4560  size_t check_row_length = 0;
4561  srcRowGraph.getGlobalRowCopy(srcgid, row_copy, check_row_length);
4562  this->insertGlobalIndices(mygid, row_length, row_copy.data());
4563  }
4564  } else {
4565  for (LO i = 0; i < static_cast<LO>(permuteToLIDs_h.extent(0)); ++i) {
4566  const GO mygid = tgtRowMap.getGlobalElement(permuteToLIDs_h[i]);
4567  const GO srcgid = srcRowMap.getGlobalElement(permuteFromLIDs_h[i]);
4568  global_inds_host_view_type row;
4569  srcCrsGraph->getGlobalRowView(srcgid, row);
4570  this->insertGlobalIndices(mygid, row.extent(0), row.data());
4571  }
4572  }
4573 
4574  if (verbose) {
4575  std::ostringstream os;
4576  os << *prefix << "Done" << endl;
4577  std::cerr << os.str();
4578  }
4579 }
4580 
4581 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4583  applyCrsPadding(const padding_type& padding,
4584  const bool verbose) {
4585  using Details::padCrsArrays;
4587  using std::endl;
4588  using LO = local_ordinal_type;
4589  using row_ptrs_type =
4590  typename local_graph_device_type::row_map_type::non_const_type;
4591  using range_policy =
4592  Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
4593  const char tfecfFuncName[] = "applyCrsPadding";
4594  ProfilingRegion regionCAP("Tpetra::CrsGraph::applyCrsPadding");
4595 
4596  std::unique_ptr<std::string> prefix;
4597  if (verbose) {
4598  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4599  std::ostringstream os;
4600  os << *prefix << "padding: ";
4601  padding.print(os);
4602  os << endl;
4603  std::cerr << os.str();
4604  }
4605  const int myRank = !verbose ? -1 : [&]() {
4606  auto map = this->getMap();
4607  if (map.is_null()) {
4608  return -1;
4609  }
4610  auto comm = map->getComm();
4611  if (comm.is_null()) {
4612  return -1;
4613  }
4614  return comm->getRank();
4615  }();
4616 
4617  // FIXME (mfh 10 Feb 2020) We shouldn't actually reallocate
4618  // row_ptrs_beg or allocate row_ptrs_end unless the allocation
4619  // size needs to increase. That should be the job of
4620  // padCrsArrays.
4621 
4622  // Assume global indexing we don't have any indices yet
4623  if (!indicesAreAllocated()) {
4624  if (verbose) {
4625  std::ostringstream os;
4626  os << *prefix << "Call allocateIndices" << endl;
4627  std::cerr << os.str();
4628  }
4629  allocateIndices(GlobalIndices, verbose);
4630  }
4631  TEUCHOS_ASSERT(indicesAreAllocated());
4632 
4633  // Making copies here because k_rowPtrs_ has a const type. Otherwise, we
4634  // would use it directly.
4635 
4636  auto rowPtrsUnpacked_dev = this->getRowPtrsUnpackedDevice();
4637  if (verbose) {
4638  std::ostringstream os;
4639  os << *prefix << "Allocate row_ptrs_beg: "
4640  << rowPtrsUnpacked_dev.extent(0) << endl;
4641  std::cerr << os.str();
4642  }
4643  using Kokkos::view_alloc;
4644  using Kokkos::WithoutInitializing;
4645  row_ptrs_type row_ptrs_beg(
4646  view_alloc("row_ptrs_beg", WithoutInitializing),
4647  rowPtrsUnpacked_dev.extent(0));
4648  // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
4649  Kokkos::deep_copy(execution_space(), row_ptrs_beg, rowPtrsUnpacked_dev);
4650 
4651  const size_t N = row_ptrs_beg.extent(0) == 0 ? size_t(0) : size_t(row_ptrs_beg.extent(0) - 1);
4652  if (verbose) {
4653  std::ostringstream os;
4654  os << *prefix << "Allocate row_ptrs_end: " << N << endl;
4655  std::cerr << os.str();
4656  }
4657  row_ptrs_type row_ptrs_end(
4658  view_alloc("row_ptrs_end", WithoutInitializing), N);
4659  row_ptrs_type num_row_entries;
4660 
4661  const bool refill_num_row_entries = k_numRowEntries_.extent(0) != 0;
4662 
4663  execution_space().fence(); // we need above deep_copy to be done
4664 
4665  if (refill_num_row_entries) { // Case 1: Unpacked storage
4666  // We can't assume correct *this capture until C++17, and it's
4667  // likely more efficient just to capture what we need anyway.
4668  num_row_entries =
4669  row_ptrs_type(view_alloc("num_row_entries", WithoutInitializing), N);
4670  Kokkos::deep_copy(num_row_entries, this->k_numRowEntries_);
4671  Kokkos::parallel_for(
4672  "Fill end row pointers", range_policy(0, N),
4673  KOKKOS_LAMBDA(const size_t i) {
4674  row_ptrs_end(i) = row_ptrs_beg(i) + num_row_entries(i);
4675  });
4676  } else {
4677  // FIXME (mfh 10 Feb 2020) Fix padCrsArrays so that if packed
4678  // storage, we don't need row_ptr_end to be separate allocation;
4679  // could just have it alias row_ptr_beg+1.
4680  Kokkos::parallel_for(
4681  "Fill end row pointers", range_policy(0, N),
4682  KOKKOS_LAMBDA(const size_t i) {
4683  row_ptrs_end(i) = row_ptrs_beg(i + 1);
4684  });
4685  }
4686 
4687  if (isGloballyIndexed()) {
4688  padCrsArrays(row_ptrs_beg, row_ptrs_end, gblInds_wdv,
4689  padding, myRank, verbose);
4690  } else {
4691  padCrsArrays(row_ptrs_beg, row_ptrs_end, lclIndsUnpacked_wdv,
4692  padding, myRank, verbose);
4693  }
4694 
4695  if (refill_num_row_entries) {
4696  Kokkos::parallel_for(
4697  "Fill num entries", range_policy(0, N),
4698  KOKKOS_LAMBDA(const size_t i) {
4699  num_row_entries(i) = row_ptrs_end(i) - row_ptrs_beg(i);
4700  });
4701  Kokkos::deep_copy(this->k_numRowEntries_, num_row_entries);
4702  }
4703  if (verbose) {
4704  std::ostringstream os;
4705  os << *prefix << "Reassign k_rowPtrs_; old size: "
4706  << rowPtrsUnpacked_dev.extent(0) << ", new size: "
4707  << row_ptrs_beg.extent(0) << endl;
4708  std::cerr << os.str();
4709  TEUCHOS_ASSERT(rowPtrsUnpacked_dev.extent(0) == row_ptrs_beg.extent(0));
4710  }
4711 
4712  setRowPtrsUnpacked(row_ptrs_beg);
4713 }
4714 
4715 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4716 std::unique_ptr<
4717  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type>
4718 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
4719  computeCrsPadding(
4720  const RowGraph<LocalOrdinal, GlobalOrdinal, Node>& source,
4721  const size_t numSameIDs,
4722  const Kokkos::DualView<const local_ordinal_type*,
4723  buffer_device_type>& permuteToLIDs,
4724  const Kokkos::DualView<const local_ordinal_type*,
4725  buffer_device_type>& permuteFromLIDs,
4726  const bool verbose) const {
4727  using LO = local_ordinal_type;
4728  using std::endl;
4729 
4730  std::unique_ptr<std::string> prefix;
4731  if (verbose) {
4732  prefix = this->createPrefix("CrsGraph",
4733  "computeCrsPadding(same & permute)");
4734  std::ostringstream os;
4735  os << *prefix << "{numSameIDs: " << numSameIDs
4736  << ", numPermutes: " << permuteFromLIDs.extent(0) << "}"
4737  << endl;
4738  std::cerr << os.str();
4739  }
4740 
4741  const int myRank = [&]() {
4742  auto comm = rowMap_.is_null() ? Teuchos::null : rowMap_->getComm();
4743  return comm.is_null() ? -1 : comm->getRank();
4744  }();
4745  std::unique_ptr<padding_type> padding(
4746  new padding_type(myRank, numSameIDs,
4747  permuteFromLIDs.extent(0)));
4748 
4749  computeCrsPaddingForSameIDs(*padding, source,
4750  static_cast<LO>(numSameIDs));
4751  computeCrsPaddingForPermutedIDs(*padding, source, permuteToLIDs,
4752  permuteFromLIDs);
4753  return padding;
4754 }
4755 
4756 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4757 void CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
4758  computeCrsPaddingForSameIDs(
4759  padding_type& padding,
4760  const RowGraph<local_ordinal_type, global_ordinal_type,
4761  node_type>& source,
4762  const local_ordinal_type numSameIDs) const {
4763  using LO = local_ordinal_type;
4764  using GO = global_ordinal_type;
4765  using Details::Impl::getRowGraphGlobalRow;
4766  using std::endl;
4767  const char tfecfFuncName[] = "computeCrsPaddingForSameIds";
4768 
4769  std::unique_ptr<std::string> prefix;
4770  const bool verbose = verbose_;
4771  if (verbose) {
4772  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4773  std::ostringstream os;
4774  os << *prefix << "numSameIDs: " << numSameIDs << endl;
4775  std::cerr << os.str();
4776  }
4777 
4778  if (numSameIDs == 0) {
4779  return;
4780  }
4781 
4782  const map_type& srcRowMap = *(source.getRowMap());
4783  const map_type& tgtRowMap = *rowMap_;
4784  using this_CRS_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
4785  const this_CRS_type* srcCrs = dynamic_cast<const this_CRS_type*>(&source);
4786  const bool src_is_unique =
4787  srcCrs == nullptr ? false : srcCrs->isMerged();
4788  const bool tgt_is_unique = this->isMerged();
4789 
4790  std::vector<GO> srcGblColIndsScratch;
4791  std::vector<GO> tgtGblColIndsScratch;
4792 
4793  execute_sync_host_uvm_access(); // protect host UVM access
4794  for (LO lclRowInd = 0; lclRowInd < numSameIDs; ++lclRowInd) {
4795  const GO srcGblRowInd = srcRowMap.getGlobalElement(lclRowInd);
4796  const GO tgtGblRowInd = tgtRowMap.getGlobalElement(lclRowInd);
4797  auto srcGblColInds = getRowGraphGlobalRow(
4798  srcGblColIndsScratch, source, srcGblRowInd);
4799  auto tgtGblColInds = getRowGraphGlobalRow(
4800  tgtGblColIndsScratch, *this, tgtGblRowInd);
4801  padding.update_same(lclRowInd, tgtGblColInds.getRawPtr(),
4802  tgtGblColInds.size(), tgt_is_unique,
4803  srcGblColInds.getRawPtr(),
4804  srcGblColInds.size(), src_is_unique);
4805  }
4806  if (verbose) {
4807  std::ostringstream os;
4808  os << *prefix << "Done" << endl;
4809  std::cerr << os.str();
4810  }
4811 }
4812 
4813 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4814 void CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
4815  computeCrsPaddingForPermutedIDs(
4816  padding_type& padding,
4817  const RowGraph<local_ordinal_type, global_ordinal_type,
4818  node_type>& source,
4819  const Kokkos::DualView<const local_ordinal_type*,
4820  buffer_device_type>& permuteToLIDs,
4821  const Kokkos::DualView<const local_ordinal_type*,
4822  buffer_device_type>& permuteFromLIDs) const {
4823  using LO = local_ordinal_type;
4824  using GO = global_ordinal_type;
4825  using Details::Impl::getRowGraphGlobalRow;
4826  using std::endl;
4827  const char tfecfFuncName[] = "computeCrsPaddingForPermutedIds";
4828 
4829  std::unique_ptr<std::string> prefix;
4830  const bool verbose = verbose_;
4831  if (verbose) {
4832  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4833  std::ostringstream os;
4834  os << *prefix << "permuteToLIDs.extent(0): "
4835  << permuteToLIDs.extent(0)
4836  << ", permuteFromLIDs.extent(0): "
4837  << permuteFromLIDs.extent(0) << endl;
4838  std::cerr << os.str();
4839  }
4840 
4841  if (permuteToLIDs.extent(0) == 0) {
4842  return;
4843  }
4844 
4845  const map_type& srcRowMap = *(source.getRowMap());
4846  const map_type& tgtRowMap = *rowMap_;
4847  using this_CRS_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
4848  const this_CRS_type* srcCrs = dynamic_cast<const this_CRS_type*>(&source);
4849  const bool src_is_unique =
4850  srcCrs == nullptr ? false : srcCrs->isMerged();
4851  const bool tgt_is_unique = this->isMerged();
4852 
4853  TEUCHOS_ASSERT(!permuteToLIDs.need_sync_host());
4854  auto permuteToLIDs_h = permuteToLIDs.view_host();
4855  TEUCHOS_ASSERT(!permuteFromLIDs.need_sync_host());
4856  auto permuteFromLIDs_h = permuteFromLIDs.view_host();
4857 
4858  std::vector<GO> srcGblColIndsScratch;
4859  std::vector<GO> tgtGblColIndsScratch;
4860  const LO numPermutes = static_cast<LO>(permuteToLIDs_h.extent(0));
4861 
4862  execute_sync_host_uvm_access(); // protect host UVM access
4863  for (LO whichPermute = 0; whichPermute < numPermutes; ++whichPermute) {
4864  const LO srcLclRowInd = permuteFromLIDs_h[whichPermute];
4865  const GO srcGblRowInd = srcRowMap.getGlobalElement(srcLclRowInd);
4866  auto srcGblColInds = getRowGraphGlobalRow(
4867  srcGblColIndsScratch, source, srcGblRowInd);
4868  const LO tgtLclRowInd = permuteToLIDs_h[whichPermute];
4869  const GO tgtGblRowInd = tgtRowMap.getGlobalElement(tgtLclRowInd);
4870  auto tgtGblColInds = getRowGraphGlobalRow(
4871  tgtGblColIndsScratch, *this, tgtGblRowInd);
4872  padding.update_permute(whichPermute, tgtLclRowInd,
4873  tgtGblColInds.getRawPtr(),
4874  tgtGblColInds.size(), tgt_is_unique,
4875  srcGblColInds.getRawPtr(),
4876  srcGblColInds.size(), src_is_unique);
4877  }
4878 
4879  if (verbose) {
4880  std::ostringstream os;
4881  os << *prefix << "Done" << endl;
4882  std::cerr << os.str();
4883  }
4884 }
4885 
4886 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4887 std::unique_ptr<
4888  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type>
4889 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
4890  computeCrsPaddingForImports(
4891  const Kokkos::DualView<const local_ordinal_type*,
4892  buffer_device_type>& importLIDs,
4893  Kokkos::DualView<packet_type*, buffer_device_type> imports,
4894  Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
4895  const bool verbose) const {
4896  using Details::Impl::getRowGraphGlobalRow;
4897  using std::endl;
4898  using LO = local_ordinal_type;
4899  using GO = global_ordinal_type;
4900  const char tfecfFuncName[] = "computeCrsPaddingForImports";
4901 
4902  std::unique_ptr<std::string> prefix;
4903  if (verbose) {
4904  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4905  std::ostringstream os;
4906  os << *prefix << "importLIDs.extent(0): "
4907  << importLIDs.extent(0)
4908  << ", imports.extent(0): "
4909  << imports.extent(0)
4910  << ", numPacketsPerLID.extent(0): "
4911  << numPacketsPerLID.extent(0) << endl;
4912  std::cerr << os.str();
4913  }
4914 
4915  const LO numImports = static_cast<LO>(importLIDs.extent(0));
4916  const int myRank = [&]() {
4917  auto comm = rowMap_.is_null() ? Teuchos::null : rowMap_->getComm();
4918  return comm.is_null() ? -1 : comm->getRank();
4919  }();
4920  std::unique_ptr<padding_type> padding(
4921  new padding_type(myRank, numImports));
4922 
4923  if (imports.need_sync_host()) {
4924  imports.sync_host();
4925  }
4926  auto imports_h = imports.view_host();
4927  if (numPacketsPerLID.need_sync_host()) {
4928  numPacketsPerLID.sync_host();
4929  }
4930  auto numPacketsPerLID_h = numPacketsPerLID.view_host();
4931 
4932  TEUCHOS_ASSERT(!importLIDs.need_sync_host());
4933  auto importLIDs_h = importLIDs.view_host();
4934 
4935  const map_type& tgtRowMap = *rowMap_;
4936  // Always merge source column indices, since isMerged() is
4937  // per-process state, and we don't know its value on other
4938  // processes that sent us data.
4939  constexpr bool src_is_unique = false;
4940  const bool tgt_is_unique = isMerged();
4941 
4942  std::vector<GO> tgtGblColIndsScratch;
4943  size_t offset = 0;
4944  execute_sync_host_uvm_access(); // protect host UVM access
4945  for (LO whichImport = 0; whichImport < numImports; ++whichImport) {
4946  // CrsGraph packs just global column indices, while CrsMatrix
4947  // packs bytes (first the number of entries in the row, then the
4948  // global column indices, then other stuff like the matrix
4949  // values in that row).
4950  const LO origSrcNumEnt =
4951  static_cast<LO>(numPacketsPerLID_h[whichImport]);
4952  GO* const srcGblColInds = imports_h.data() + offset;
4953 
4954  const LO tgtLclRowInd = importLIDs_h[whichImport];
4955  const GO tgtGblRowInd =
4956  tgtRowMap.getGlobalElement(tgtLclRowInd);
4957  auto tgtGblColInds = getRowGraphGlobalRow(
4958  tgtGblColIndsScratch, *this, tgtGblRowInd);
4959  const size_t origTgtNumEnt(tgtGblColInds.size());
4960 
4961  padding->update_import(whichImport, tgtLclRowInd,
4962  tgtGblColInds.getRawPtr(),
4963  origTgtNumEnt, tgt_is_unique,
4964  srcGblColInds,
4965  origSrcNumEnt, src_is_unique);
4966  offset += origSrcNumEnt;
4967  }
4968 
4969  if (verbose) {
4970  std::ostringstream os;
4971  os << *prefix << "Done" << endl;
4972  std::cerr << os.str();
4973  }
4974  return padding;
4975 }
4976 
4977 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4978 std::unique_ptr<
4979  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type>
4980 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
4981  computePaddingForCrsMatrixUnpack(
4982  const Kokkos::DualView<const local_ordinal_type*,
4983  buffer_device_type>& importLIDs,
4984  Kokkos::DualView<char*, buffer_device_type> imports,
4985  Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
4986  const bool verbose) const {
4987  using Details::PackTraits;
4988  using Details::Impl::getRowGraphGlobalRow;
4989  using std::endl;
4990  using LO = local_ordinal_type;
4991  using GO = global_ordinal_type;
4992  const char tfecfFuncName[] = "computePaddingForCrsMatrixUnpack";
4993 
4994  std::unique_ptr<std::string> prefix;
4995  if (verbose) {
4996  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4997  std::ostringstream os;
4998  os << *prefix << "importLIDs.extent(0): "
4999  << importLIDs.extent(0)
5000  << ", imports.extent(0): "
5001  << imports.extent(0)
5002  << ", numPacketsPerLID.extent(0): "
5003  << numPacketsPerLID.extent(0) << endl;
5004  std::cerr << os.str();
5005  }
5006  const bool extraVerbose =
5007  verbose && Details::Behavior::verbose("CrsPadding");
5008 
5009  const LO numImports = static_cast<LO>(importLIDs.extent(0));
5010  TEUCHOS_ASSERT(LO(numPacketsPerLID.extent(0)) >= numImports);
5011  const int myRank = [&]() {
5012  auto comm = rowMap_.is_null() ? Teuchos::null : rowMap_->getComm();
5013  return comm.is_null() ? -1 : comm->getRank();
5014  }();
5015  std::unique_ptr<padding_type> padding(
5016  new padding_type(myRank, numImports));
5017 
5018  if (imports.need_sync_host()) {
5019  imports.sync_host();
5020  }
5021  auto imports_h = imports.view_host();
5022  if (numPacketsPerLID.need_sync_host()) {
5023  numPacketsPerLID.sync_host();
5024  }
5025  auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5026 
5027  TEUCHOS_ASSERT(!importLIDs.need_sync_host());
5028  auto importLIDs_h = importLIDs.view_host();
5029 
5030  const map_type& tgtRowMap = *rowMap_;
5031  // Always merge source column indices, since isMerged() is
5032  // per-process state, and we don't know its value on other
5033  // processes that sent us data.
5034  constexpr bool src_is_unique = false;
5035  const bool tgt_is_unique = isMerged();
5036 
5037  std::vector<GO> srcGblColIndsScratch;
5038  std::vector<GO> tgtGblColIndsScratch;
5039  size_t offset = 0;
5040  execute_sync_host_uvm_access(); // protect host UVM access
5041  for (LO whichImport = 0; whichImport < numImports; ++whichImport) {
5042  // CrsGraph packs just global column indices, while CrsMatrix
5043  // packs bytes (first the number of entries in the row, then the
5044  // global column indices, then other stuff like the matrix
5045  // values in that row).
5046  const size_t numBytes = numPacketsPerLID_h[whichImport];
5047  if (extraVerbose) {
5048  std::ostringstream os;
5049  os << *prefix << "whichImport=" << whichImport
5050  << ", numImports=" << numImports
5051  << ", numBytes=" << numBytes << endl;
5052  std::cerr << os.str();
5053  }
5054  if (numBytes == 0) {
5055  continue; // special case: no entries to unpack for this row
5056  }
5057  LO origSrcNumEnt = 0;
5058  const size_t numEntBeg = offset;
5059  const size_t numEntLen =
5060  PackTraits<LO>::packValueCount(origSrcNumEnt);
5061  TEUCHOS_ASSERT(numBytes >= numEntLen);
5062  TEUCHOS_ASSERT(imports_h.extent(0) >= numEntBeg + numEntLen);
5063  PackTraits<LO>::unpackValue(origSrcNumEnt,
5064  imports_h.data() + numEntBeg);
5065  if (extraVerbose) {
5066  std::ostringstream os;
5067  os << *prefix << "whichImport=" << whichImport
5068  << ", numImports=" << numImports
5069  << ", origSrcNumEnt=" << origSrcNumEnt << endl;
5070  std::cerr << os.str();
5071  }
5072  TEUCHOS_ASSERT(origSrcNumEnt >= LO(0));
5073  TEUCHOS_ASSERT(numBytes >= size_t(numEntLen + origSrcNumEnt * sizeof(GO)));
5074  const size_t gidsBeg = numEntBeg + numEntLen;
5075  if (srcGblColIndsScratch.size() < size_t(origSrcNumEnt)) {
5076  srcGblColIndsScratch.resize(origSrcNumEnt);
5077  }
5078  GO* const srcGblColInds = srcGblColIndsScratch.data();
5079  PackTraits<GO>::unpackArray(srcGblColInds,
5080  imports_h.data() + gidsBeg,
5081  origSrcNumEnt);
5082  const LO tgtLclRowInd = importLIDs_h[whichImport];
5083  const GO tgtGblRowInd =
5084  tgtRowMap.getGlobalElement(tgtLclRowInd);
5085  auto tgtGblColInds = getRowGraphGlobalRow(
5086  tgtGblColIndsScratch, *this, tgtGblRowInd);
5087  const size_t origNumTgtEnt(tgtGblColInds.size());
5088 
5089  if (extraVerbose) {
5090  std::ostringstream os;
5091  os << *prefix << "whichImport=" << whichImport
5092  << ", numImports=" << numImports
5093  << ": Call padding->update_import" << endl;
5094  std::cerr << os.str();
5095  }
5096  padding->update_import(whichImport, tgtLclRowInd,
5097  tgtGblColInds.getRawPtr(),
5098  origNumTgtEnt, tgt_is_unique,
5099  srcGblColInds,
5100  origSrcNumEnt, src_is_unique);
5101  offset += numBytes;
5102  }
5103 
5104  if (verbose) {
5105  std::ostringstream os;
5106  os << *prefix << "Done" << endl;
5107  std::cerr << os.str();
5108  }
5109  return padding;
5110 }
5111 
5112 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5113 void CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5114  packAndPrepare(const SrcDistObject& source,
5115  const Kokkos::DualView<const local_ordinal_type*,
5116  buffer_device_type>& exportLIDs,
5117  Kokkos::DualView<packet_type*,
5118  buffer_device_type>& exports,
5119  Kokkos::DualView<size_t*,
5120  buffer_device_type>
5121  numPacketsPerLID,
5122  size_t& constantNumPackets) {
5124  using GO = global_ordinal_type;
5125  using std::endl;
5126  using crs_graph_type =
5127  CrsGraph<local_ordinal_type, global_ordinal_type, node_type>;
5128  const char tfecfFuncName[] = "packAndPrepare: ";
5129  ProfilingRegion region_papn("Tpetra::CrsGraph::packAndPrepare");
5130 
5131  const bool verbose = verbose_;
5132  std::unique_ptr<std::string> prefix;
5133  if (verbose) {
5134  prefix = this->createPrefix("CrsGraph", "packAndPrepare");
5135  std::ostringstream os;
5136  os << *prefix << "Start" << endl;
5137  std::cerr << os.str();
5138  }
5139 
5140  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(exportLIDs.extent(0) != numPacketsPerLID.extent(0),
5141  std::runtime_error,
5142  "exportLIDs.extent(0) = " << exportLIDs.extent(0)
5143  << " != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent(0)
5144  << ".");
5145  const row_graph_type* srcRowGraphPtr =
5146  dynamic_cast<const row_graph_type*>(&source);
5147  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(srcRowGraphPtr == nullptr, std::invalid_argument,
5148  "Source of an Export "
5149  "or Import operation to a CrsGraph must be a RowGraph with the same "
5150  "template parameters.");
5151  // We don't check whether src_graph has had fillComplete called,
5152  // because it doesn't matter whether the *source* graph has been
5153  // fillComplete'd. The target graph can not be fillComplete'd yet.
5154  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isFillComplete(), std::runtime_error,
5155  "The target graph of an Import or Export must not be fill complete.");
5156 
5157  const crs_graph_type* srcCrsGraphPtr =
5158  dynamic_cast<const crs_graph_type*>(&source);
5159 
5160  if (srcCrsGraphPtr == nullptr) {
5161  using Teuchos::ArrayView;
5162  using LO = local_ordinal_type;
5163 
5164  if (verbose) {
5165  std::ostringstream os;
5166  os << *prefix << "Source is a RowGraph but not a CrsGraph"
5167  << endl;
5168  std::cerr << os.str();
5169  }
5170  // RowGraph::pack serves the "old" DistObject interface. It
5171  // takes Teuchos::ArrayView and Teuchos::Array&. The latter
5172  // entails deep-copying the exports buffer on output. RowGraph
5173  // is a convenience interface when not a CrsGraph, so we accept
5174  // the performance hit.
5175  TEUCHOS_ASSERT(!exportLIDs.need_sync_host());
5176  auto exportLIDs_h = exportLIDs.view_host();
5177  ArrayView<const LO> exportLIDs_av(exportLIDs_h.data(),
5178  exportLIDs_h.extent(0));
5179  Teuchos::Array<GO> exports_a;
5180 
5181  numPacketsPerLID.clear_sync_state();
5182  numPacketsPerLID.modify_host();
5183  auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5184  ArrayView<size_t> numPacketsPerLID_av(numPacketsPerLID_h.data(),
5185  numPacketsPerLID_h.extent(0));
5186  srcRowGraphPtr->pack(exportLIDs_av, exports_a, numPacketsPerLID_av,
5187  constantNumPackets);
5188  const size_t newSize = static_cast<size_t>(exports_a.size());
5189  if (static_cast<size_t>(exports.extent(0)) != newSize) {
5190  using exports_dv_type = Kokkos::DualView<packet_type*, buffer_device_type>;
5191  exports = exports_dv_type("exports", newSize);
5192  }
5193  Kokkos::View<const packet_type*, Kokkos::HostSpace,
5194  Kokkos::MemoryUnmanaged>
5195  exports_a_h(exports_a.getRawPtr(), newSize);
5196  exports.clear_sync_state();
5197  exports.modify_host();
5198  // DEEP_COPY REVIEW - NOT TESTED
5199  Kokkos::deep_copy(exports.view_host(), exports_a_h);
5200  }
5201  // packCrsGraphNew requires k_rowPtrsPacked_ to be set
5202  else if (!getColMap().is_null() &&
5203  (this->getRowPtrsPackedDevice().extent(0) != 0 ||
5204  getRowMap()->getLocalNumElements() == 0)) {
5205  if (verbose) {
5206  std::ostringstream os;
5207  os << *prefix << "packCrsGraphNew path" << endl;
5208  std::cerr << os.str();
5209  }
5210  using export_pids_type =
5211  Kokkos::DualView<const int*, buffer_device_type>;
5212  export_pids_type exportPIDs; // not filling it; needed for syntax
5213  using LO = local_ordinal_type;
5214  using NT = node_type;
5216  packCrsGraphNew<LO, GO, NT>(*srcCrsGraphPtr, exportLIDs, exportPIDs,
5217  exports, numPacketsPerLID,
5218  constantNumPackets, false);
5219  } else {
5220  srcCrsGraphPtr->packFillActiveNew(exportLIDs, exports, numPacketsPerLID,
5221  constantNumPackets);
5222  }
5223 
5224  if (verbose) {
5225  std::ostringstream os;
5226  os << *prefix << "Done" << endl;
5227  std::cerr << os.str();
5228  }
5229 }
5230 
5231 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5233  pack(const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5234  Teuchos::Array<GlobalOrdinal>& exports,
5235  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5236  size_t& constantNumPackets) const {
5237  auto col_map = this->getColMap();
5238  // packCrsGraph requires k_rowPtrsPacked to be set
5239  if (!col_map.is_null() && (this->getRowPtrsPackedDevice().extent(0) != 0 || getRowMap()->getLocalNumElements() == 0)) {
5241  packCrsGraph<LocalOrdinal, GlobalOrdinal, Node>(*this, exports, numPacketsPerLID,
5242  exportLIDs, constantNumPackets);
5243  } else {
5244  this->packFillActive(exportLIDs, exports, numPacketsPerLID,
5245  constantNumPackets);
5246  }
5247 }
5248 
5249 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5251  packFillActive(const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5252  Teuchos::Array<GlobalOrdinal>& exports,
5253  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5254  size_t& constantNumPackets) const {
5255  using std::endl;
5256  using LO = LocalOrdinal;
5257  using GO = GlobalOrdinal;
5258  using host_execution_space =
5259  typename Kokkos::View<size_t*, device_type>::
5260  host_mirror_type::execution_space;
5261  const char tfecfFuncName[] = "packFillActive: ";
5262  const bool verbose = verbose_;
5263 
5264  const auto numExportLIDs = exportLIDs.size();
5265  std::unique_ptr<std::string> prefix;
5266  if (verbose) {
5267  prefix = this->createPrefix("CrsGraph", "allocateIndices");
5268  std::ostringstream os;
5269  os << *prefix << "numExportLIDs=" << numExportLIDs << endl;
5270  std::cerr << os.str();
5271  }
5272  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numExportLIDs != numPacketsPerLID.size(), std::runtime_error,
5273  "exportLIDs.size() = " << numExportLIDs << " != numPacketsPerLID.size()"
5274  " = "
5275  << numPacketsPerLID.size() << ".");
5276 
5277  const map_type& rowMap = *(this->getRowMap());
5278  const map_type* const colMapPtr = this->colMap_.getRawPtr();
5279  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isLocallyIndexed() && colMapPtr == nullptr, std::logic_error,
5280  "This graph claims to be locally indexed, but its column Map is nullptr. "
5281  "This should never happen. Please report this bug to the Tpetra "
5282  "developers.");
5283 
5284  // We may pack different amounts of data for different rows.
5285  constantNumPackets = 0;
5286 
5287  // mfh 20 Sep 2017: Teuchos::ArrayView isn't thread safe (well,
5288  // it might be now, but we might as well be safe).
5289  size_t* const numPacketsPerLID_raw = numPacketsPerLID.getRawPtr();
5290  const LO* const exportLIDs_raw = exportLIDs.getRawPtr();
5291 
5292  // Count the total number of packets (column indices, in the case
5293  // of a CrsGraph) to pack. While doing so, set
5294  // numPacketsPerLID[i] to the number of entries owned by the
5295  // calling process in (local) row exportLIDs[i] of the graph, that
5296  // the caller wants us to send out.
5297  Kokkos::RangePolicy<host_execution_space, LO> inputRange(0, numExportLIDs);
5298  size_t totalNumPackets = 0;
5299  size_t errCount = 0;
5300  // lambdas turn what they capture const, so we can't
5301  // atomic_add(&errCount,1). Instead, we need a View to modify.
5302  typedef Kokkos::Device<host_execution_space, Kokkos::HostSpace>
5303  host_device_type;
5304  Kokkos::View<size_t, host_device_type> errCountView(&errCount);
5305  constexpr size_t ONE = 1;
5306 
5307  execute_sync_host_uvm_access(); // protect host UVM access
5308  Kokkos::parallel_reduce(
5309  "Tpetra::CrsGraph::pack: totalNumPackets",
5310  inputRange,
5311  [=, *this](const LO& i, size_t& curTotalNumPackets) {
5312  const GO gblRow = rowMap.getGlobalElement(exportLIDs_raw[i]);
5313  if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid()) {
5314  Kokkos::atomic_add(&errCountView(), ONE);
5315  numPacketsPerLID_raw[i] = 0;
5316  } else {
5317  const size_t numEnt = this->getNumEntriesInGlobalRow(gblRow);
5318  numPacketsPerLID_raw[i] = numEnt;
5319  curTotalNumPackets += numEnt;
5320  }
5321  },
5322  totalNumPackets);
5323 
5324  if (verbose) {
5325  std::ostringstream os;
5326  os << *prefix << "totalNumPackets=" << totalNumPackets << endl;
5327  std::cerr << os.str();
5328  }
5329  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(errCount != 0, std::logic_error,
5330  "totalNumPackets count encountered "
5331  "one or more errors! errCount = "
5332  << errCount
5333  << ", totalNumPackets = " << totalNumPackets << ".");
5334  errCount = 0;
5335 
5336  // Allocate space for all the column indices to pack.
5337  exports.resize(totalNumPackets);
5338 
5339  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->supportsRowViews(), std::logic_error,
5340  "this->supportsRowViews() returns false; this should never happen. "
5341  "Please report this bug to the Tpetra developers.");
5342 
5343  // Loop again over the rows to export, and pack rows of indices
5344  // into the output buffer.
5345 
5346  if (verbose) {
5347  std::ostringstream os;
5348  os << *prefix << "Pack into exports" << endl;
5349  std::cerr << os.str();
5350  }
5351 
5352  // Teuchos::ArrayView may not be thread safe, or may not be
5353  // efficiently thread safe. Better to use the raw pointer.
5354  GO* const exports_raw = exports.getRawPtr();
5355  errCount = 0;
5356  Kokkos::parallel_scan("Tpetra::CrsGraph::pack: pack from views",
5357  inputRange, [=, &prefix, *this](const LO i, size_t& exportsOffset, const bool final) {
5358  const size_t curOffset = exportsOffset;
5359  const GO gblRow = rowMap.getGlobalElement(exportLIDs_raw[i]);
5360  const RowInfo rowInfo =
5361  this->getRowInfoFromGlobalRowIndex(gblRow);
5362 
5363  using TDO = Tpetra::Details::OrdinalTraits<size_t>;
5364  if (rowInfo.localRow == TDO::invalid()) {
5365  if (verbose) {
5366  std::ostringstream os;
5367  os << *prefix << ": INVALID rowInfo: i=" << i
5368  << ", lclRow=" << exportLIDs_raw[i] << endl;
5369  std::cerr << os.str();
5370  }
5371  Kokkos::atomic_add(&errCountView(), ONE);
5372  } else if (curOffset + rowInfo.numEntries > totalNumPackets) {
5373  if (verbose) {
5374  std::ostringstream os;
5375  os << *prefix << ": UH OH! For i=" << i << ", lclRow="
5376  << exportLIDs_raw[i] << ", gblRow=" << gblRow << ", curOffset "
5377  "(= "
5378  << curOffset << ") + numEnt (= " << rowInfo.numEntries
5379  << ") > totalNumPackets (= " << totalNumPackets << ")."
5380  << endl;
5381  std::cerr << os.str();
5382  }
5383  Kokkos::atomic_add(&errCountView(), ONE);
5384  } else {
5385  const LO numEnt = static_cast<LO>(rowInfo.numEntries);
5386  if (this->isLocallyIndexed()) {
5387  auto lclColInds = getLocalIndsViewHost(rowInfo);
5388  if (final) {
5389  for (LO k = 0; k < numEnt; ++k) {
5390  const LO lclColInd = lclColInds(k);
5391  const GO gblColInd = colMapPtr->getGlobalElement(lclColInd);
5392  // Pack it, even if it's wrong. Let the receiving
5393  // process deal with it. Otherwise, we'll miss out
5394  // on any correct data.
5395  exports_raw[curOffset + k] = gblColInd;
5396  } // for each entry in the row
5397  } // final pass?
5398  exportsOffset = curOffset + numEnt;
5399  } else if (this->isGloballyIndexed()) {
5400  auto gblColInds = getGlobalIndsViewHost(rowInfo);
5401  if (final) {
5402  for (LO k = 0; k < numEnt; ++k) {
5403  const GO gblColInd = gblColInds(k);
5404  // Pack it, even if it's wrong. Let the receiving
5405  // process deal with it. Otherwise, we'll miss out
5406  // on any correct data.
5407  exports_raw[curOffset + k] = gblColInd;
5408  } // for each entry in the row
5409  } // final pass?
5410  exportsOffset = curOffset + numEnt;
5411  }
5412  // If neither globally nor locally indexed, then the graph
5413  // has no entries in this row (or indeed, in any row on this
5414  // process) to pack.
5415  }
5416  });
5417 
5418  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(errCount != 0, std::logic_error,
5419  "Packing encountered "
5420  "one or more errors! errCount = "
5421  << errCount
5422  << ", totalNumPackets = " << totalNumPackets << ".");
5423 
5424  if (verbose) {
5425  std::ostringstream os;
5426  os << *prefix << "Done" << endl;
5427  std::cerr << os.str();
5428  }
5429 }
5430 
5431 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5432 void CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5433  packFillActiveNew(const Kokkos::DualView<const local_ordinal_type*,
5434  buffer_device_type>& exportLIDs,
5435  Kokkos::DualView<packet_type*,
5436  buffer_device_type>& exports,
5437  Kokkos::DualView<size_t*,
5438  buffer_device_type>
5439  numPacketsPerLID,
5440  size_t& constantNumPackets) const {
5441  using std::endl;
5442  using LO = local_ordinal_type;
5443  using GO = global_ordinal_type;
5444  using host_execution_space = typename Kokkos::View<size_t*,
5445  device_type>::host_mirror_type::execution_space;
5446  using host_device_type =
5447  Kokkos::Device<host_execution_space, Kokkos::HostSpace>;
5448  using exports_dv_type =
5449  Kokkos::DualView<packet_type*, buffer_device_type>;
5450  const char tfecfFuncName[] = "packFillActiveNew: ";
5451  const bool verbose = verbose_;
5452 
5453  const auto numExportLIDs = exportLIDs.extent(0);
5454  std::unique_ptr<std::string> prefix;
5455  if (verbose) {
5456  prefix = this->createPrefix("CrsGraph", "packFillActiveNew");
5457  std::ostringstream os;
5458  os << *prefix << "numExportLIDs: " << numExportLIDs
5459  << ", numPacketsPerLID.extent(0): "
5460  << numPacketsPerLID.extent(0) << endl;
5461  std::cerr << os.str();
5462  }
5463  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numExportLIDs != numPacketsPerLID.extent(0), std::runtime_error,
5464  "exportLIDs.extent(0) = " << numExportLIDs
5465  << " != numPacketsPerLID.extent(0) = "
5466  << numPacketsPerLID.extent(0) << ".");
5467  TEUCHOS_ASSERT(!exportLIDs.need_sync_host());
5468  auto exportLIDs_h = exportLIDs.view_host();
5469 
5470  const map_type& rowMap = *(this->getRowMap());
5471  const map_type* const colMapPtr = this->colMap_.getRawPtr();
5472  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isLocallyIndexed() && colMapPtr == nullptr, std::logic_error,
5473  "This graph claims to be locally indexed, but its column Map is nullptr. "
5474  "This should never happen. Please report this bug to the Tpetra "
5475  "developers.");
5476 
5477  // We may pack different amounts of data for different rows.
5478  constantNumPackets = 0;
5479 
5480  numPacketsPerLID.clear_sync_state();
5481  numPacketsPerLID.modify_host();
5482  auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5483 
5484  // Count the total number of packets (column indices, in the case
5485  // of a CrsGraph) to pack. While doing so, set
5486  // numPacketsPerLID[i] to the number of entries owned by the
5487  // calling process in (local) row exportLIDs[i] of the graph, that
5488  // the caller wants us to send out.
5489  using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
5490  range_type inputRange(0, numExportLIDs);
5491  size_t totalNumPackets = 0;
5492  size_t errCount = 0;
5493  // lambdas turn what they capture const, so we can't
5494  // atomic_add(&errCount,1). Instead, we need a View to modify.
5495  Kokkos::View<size_t, host_device_type> errCountView(&errCount);
5496  constexpr size_t ONE = 1;
5497 
5498  if (verbose) {
5499  std::ostringstream os;
5500  os << *prefix << "Compute totalNumPackets" << endl;
5501  std::cerr << os.str();
5502  }
5503 
5504  execute_sync_host_uvm_access(); // protect host UVM access
5505  totalNumPackets = 0;
5506  for (size_t i = 0; i < numExportLIDs; ++i) {
5507  const LO lclRow = exportLIDs_h[i];
5508  const GO gblRow = rowMap.getGlobalElement(lclRow);
5509  if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid()) {
5510  if (verbose) {
5511  std::ostringstream os;
5512  os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5513  << " not in row Map on this process" << endl;
5514  std::cerr << os.str();
5515  }
5516  Kokkos::atomic_add(&errCountView(), ONE);
5517  numPacketsPerLID_h(i) = 0;
5518  } else {
5519  const size_t numEnt = this->getNumEntriesInGlobalRow(gblRow);
5520  numPacketsPerLID_h(i) = numEnt;
5521  totalNumPackets += numEnt;
5522  }
5523  }
5524 
5525  if (verbose) {
5526  std::ostringstream os;
5527  os << *prefix << "totalNumPackets: " << totalNumPackets
5528  << ", errCount: " << errCount << endl;
5529  std::cerr << os.str();
5530  }
5531  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(errCount != 0, std::logic_error,
5532  "totalNumPackets count encountered "
5533  "one or more errors! totalNumPackets: "
5534  << totalNumPackets
5535  << ", errCount: " << errCount << ".");
5536 
5537  // Allocate space for all the column indices to pack.
5538  if (size_t(exports.extent(0)) < totalNumPackets) {
5539  // FIXME (mfh 09 Apr 2019) Create without initializing.
5540  exports = exports_dv_type("exports", totalNumPackets);
5541  }
5542 
5543  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->supportsRowViews(), std::logic_error,
5544  "this->supportsRowViews() returns false; this should never happen. "
5545  "Please report this bug to the Tpetra developers.");
5546 
5547  // Loop again over the rows to export, and pack rows of indices
5548  // into the output buffer.
5549 
5550  if (verbose) {
5551  std::ostringstream os;
5552  os << *prefix << "Pack into exports buffer" << endl;
5553  std::cerr << os.str();
5554  }
5555 
5556  exports.clear_sync_state();
5557  exports.modify_host();
5558  auto exports_h = exports.view_host();
5559 
5560  errCount = 0;
5561 
5562  // The following parallel_scan needs const host access to lclIndsUnpacked_wdv
5563  // (if locally indexed) or gblInds_wdv (if globally indexed).
5564  if (isLocallyIndexed())
5565  lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
5566  else if (isGloballyIndexed())
5567  gblInds_wdv.getHostView(Access::ReadOnly);
5568 
5570  Kokkos::parallel_scan("Tpetra::CrsGraph::packFillActiveNew: Pack exports",
5571  inputRange, [=, &prefix, *this](const LO i, size_t& exportsOffset, const bool final) {
5572  const size_t curOffset = exportsOffset;
5573  const LO lclRow = exportLIDs_h(i);
5574  const GO gblRow = rowMap.getGlobalElement(lclRow);
5575  if (gblRow == Details::OrdinalTraits<GO>::invalid()) {
5576  if (verbose) {
5577  std::ostringstream os;
5578  os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5579  << " not in row Map on this process" << endl;
5580  std::cerr << os.str();
5581  }
5582  Kokkos::atomic_add(&errCountView(), ONE);
5583  return;
5584  }
5585 
5586  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex(gblRow);
5587  if (rowInfo.localRow == Details::OrdinalTraits<size_t>::invalid()) {
5588  if (verbose) {
5589  std::ostringstream os;
5590  os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5591  << ", gblRow=" << gblRow << ": invalid rowInfo"
5592  << endl;
5593  std::cerr << os.str();
5594  }
5595  Kokkos::atomic_add(&errCountView(), ONE);
5596  return;
5597  }
5598 
5599  if (curOffset + rowInfo.numEntries > totalNumPackets) {
5600  if (verbose) {
5601  std::ostringstream os;
5602  os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5603  << ", gblRow=" << gblRow << ", curOffset (= "
5604  << curOffset << ") + numEnt (= " << rowInfo.numEntries
5605  << ") > totalNumPackets (= " << totalNumPackets
5606  << ")." << endl;
5607  std::cerr << os.str();
5608  }
5609  Kokkos::atomic_add(&errCountView(), ONE);
5610  return;
5611  }
5612 
5613  const LO numEnt = static_cast<LO>(rowInfo.numEntries);
5614  if (this->isLocallyIndexed()) {
5615  auto lclColInds = getLocalIndsViewHost(rowInfo);
5616  if (final) {
5617  for (LO k = 0; k < numEnt; ++k) {
5618  const LO lclColInd = lclColInds(k);
5619  const GO gblColInd = colMapPtr->getGlobalElement(lclColInd);
5620  // Pack it, even if it's wrong. Let the receiving
5621  // process deal with it. Otherwise, we'll miss out
5622  // on any correct data.
5623  exports_h(curOffset + k) = gblColInd;
5624  } // for each entry in the row
5625  } // final pass?
5626  exportsOffset = curOffset + numEnt;
5627  } else if (this->isGloballyIndexed()) {
5628  auto gblColInds = getGlobalIndsViewHost(rowInfo);
5629  if (final) {
5630  for (LO k = 0; k < numEnt; ++k) {
5631  const GO gblColInd = gblColInds(k);
5632  // Pack it, even if it's wrong. Let the receiving
5633  // process deal with it. Otherwise, we'll miss out
5634  // on any correct data.
5635  exports_h(curOffset + k) = gblColInd;
5636  } // for each entry in the row
5637  } // final pass?
5638  exportsOffset = curOffset + numEnt;
5639  }
5640  // If neither globally nor locally indexed, then the graph
5641  // has no entries in this row (or indeed, in any row on this
5642  // process) to pack.
5643  });
5645 
5646  // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5647  // (errCount != 0, std::logic_error, "Packing encountered "
5648  // "one or more errors! errCount = " << errCount
5649  // << ", totalNumPackets = " << totalNumPackets << ".");
5650 
5651  if (verbose) {
5652  std::ostringstream os;
5653  os << *prefix << "errCount=" << errCount << "; Done" << endl;
5654  std::cerr << os.str();
5655  }
5656 }
5657 
5658 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5660  unpackAndCombine(const Kokkos::DualView<const local_ordinal_type*,
5661  buffer_device_type>& importLIDs,
5662  Kokkos::DualView<packet_type*,
5664  imports,
5665  Kokkos::DualView<size_t*,
5667  numPacketsPerLID,
5668  const size_t /* constantNumPackets */,
5669  const CombineMode /* combineMode */) {
5671  using std::endl;
5672  using LO = local_ordinal_type;
5673  using GO = global_ordinal_type;
5674  const char tfecfFuncName[] = "unpackAndCombine";
5675 
5676  ProfilingRegion regionCGC("Tpetra::CrsGraph::unpackAndCombine");
5677  const bool verbose = verbose_;
5678 
5679  std::unique_ptr<std::string> prefix;
5680  if (verbose) {
5681  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5682  std::ostringstream os;
5683  os << *prefix << "Start" << endl;
5684  std::cerr << os.str();
5685  }
5686  {
5687  auto padding = computeCrsPaddingForImports(
5688  importLIDs, imports, numPacketsPerLID, verbose);
5689  applyCrsPadding(*padding, verbose);
5690  if (verbose) {
5691  std::ostringstream os;
5692  os << *prefix << "Done computing & applying padding" << endl;
5693  std::cerr << os.str();
5694  }
5695  }
5696 
5697  // FIXME (mfh 02 Apr 2012) REPLACE combine mode has a perfectly
5698  // reasonable meaning, whether or not the matrix is fill complete.
5699  // It's just more work to implement.
5700 
5701  // We are not checking the value of the CombineMode input
5702  // argument. For CrsGraph, we only support import/export
5703  // operations if fillComplete has not yet been called. Any
5704  // incoming column-indices are inserted into the target graph. In
5705  // this context, CombineMode values of ADD vs INSERT are
5706  // equivalent. What is the meaning of REPLACE for CrsGraph? If a
5707  // duplicate column-index is inserted, it will be compressed out
5708  // when fillComplete is called.
5709  //
5710  // Note: I think REPLACE means that an existing row is replaced by
5711  // the imported row, i.e., the existing indices are cleared. CGB,
5712  // 6/17/2010
5713 
5714  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(importLIDs.extent(0) != numPacketsPerLID.extent(0),
5715  std::runtime_error, ": importLIDs.extent(0) = " << importLIDs.extent(0) << " != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent(0) << ".");
5716  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isFillComplete(), std::runtime_error,
5717  ": Import or Export operations are not allowed on a target "
5718  "CrsGraph that is fillComplete.");
5719 
5720  const size_t numImportLIDs(importLIDs.extent(0));
5721  if (numPacketsPerLID.need_sync_host()) {
5722  numPacketsPerLID.sync_host();
5723  }
5724  auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5725  if (imports.need_sync_host()) {
5726  imports.sync_host();
5727  }
5728  auto imports_h = imports.view_host();
5729  TEUCHOS_ASSERT(!importLIDs.need_sync_host());
5730  auto importLIDs_h = importLIDs.view_host();
5731 
5732  // If we're inserting in local indices, let's pre-allocate
5733  Teuchos::Array<LO> lclColInds;
5734  if (isLocallyIndexed()) {
5735  if (verbose) {
5736  std::ostringstream os;
5737  os << *prefix << "Preallocate local indices scratch" << endl;
5738  std::cerr << os.str();
5739  }
5740  size_t maxNumInserts = 0;
5741  for (size_t i = 0; i < numImportLIDs; ++i) {
5742  maxNumInserts = std::max(maxNumInserts, numPacketsPerLID_h[i]);
5743  }
5744  if (verbose) {
5745  std::ostringstream os;
5746  os << *prefix << "Local indices scratch size: "
5747  << maxNumInserts << endl;
5748  std::cerr << os.str();
5749  }
5750  lclColInds.resize(maxNumInserts);
5751  } else {
5752  if (verbose) {
5753  std::ostringstream os;
5754  os << *prefix;
5755  if (isGloballyIndexed()) {
5756  os << "Graph is globally indexed";
5757  } else {
5758  os << "Graph is neither locally nor globally indexed";
5759  }
5760  os << endl;
5761  std::cerr << os.str();
5762  }
5763  }
5764 
5765  TEUCHOS_ASSERT(!rowMap_.is_null());
5766  const map_type& rowMap = *rowMap_;
5767 
5768  try {
5769  size_t importsOffset = 0;
5770  for (size_t i = 0; i < numImportLIDs; ++i) {
5771  if (verbose) {
5772  std::ostringstream os;
5773  os << *prefix << "i=" << i << ", numImportLIDs="
5774  << numImportLIDs << endl;
5775  std::cerr << os.str();
5776  }
5777  // We can only unpack into owned rows, since we only have
5778  // local row indices.
5779  const LO lclRow = importLIDs_h[i];
5780  const GO gblRow = rowMap.getGlobalElement(lclRow);
5781  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(gblRow == Teuchos::OrdinalTraits<GO>::invalid(),
5782  std::logic_error, "importLIDs[i=" << i << "]=" << lclRow << " is not in the row Map on the calling "
5783  "process.");
5784  const LO numEnt = numPacketsPerLID_h[i];
5785  const GO* const gblColInds = (numEnt == 0) ? nullptr : imports_h.data() + importsOffset;
5786  if (!isLocallyIndexed()) {
5787  insertGlobalIndicesFiltered(lclRow, gblColInds, numEnt);
5788  } else {
5789  // FIXME (mfh 09 Feb 2020) Now would be a good time to do
5790  // column Map filtering.
5791  for (LO j = 0; j < numEnt; j++) {
5792  lclColInds[j] = colMap_->getLocalElement(gblColInds[j]);
5793  }
5794  insertLocalIndices(lclRow, numEnt, lclColInds.data());
5795  }
5796  importsOffset += numEnt;
5797  }
5798  } catch (std::exception& e) {
5799  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error,
5800  "Tpetra::CrsGraph::unpackAndCombine: Insert loop threw an "
5801  "exception: "
5802  << endl
5803  << e.what());
5804  }
5805 
5806  if (verbose) {
5807  std::ostringstream os;
5808  os << *prefix << "Done" << endl;
5809  std::cerr << os.str();
5810  }
5811 }
5812 
5813 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5815  removeEmptyProcessesInPlace(const Teuchos::RCP<const map_type>& newMap) {
5816  using Teuchos::Comm;
5817  using Teuchos::null;
5818  using Teuchos::ParameterList;
5819  using Teuchos::RCP;
5820 
5821  // We'll set all the state "transactionally," so that this method
5822  // satisfies the strong exception guarantee. This object's state
5823  // won't be modified until the end of this method.
5824  RCP<const map_type> rowMap, domainMap, rangeMap, colMap;
5825  RCP<import_type> importer;
5826  RCP<export_type> exporter;
5827 
5828  rowMap = newMap;
5829  RCP<const Comm<int>> newComm =
5830  (newMap.is_null()) ? null : newMap->getComm();
5831 
5832  if (!domainMap_.is_null()) {
5833  if (domainMap_.getRawPtr() == rowMap_.getRawPtr()) {
5834  // Common case: original domain and row Maps are identical.
5835  // In that case, we need only replace the original domain Map
5836  // with the new Map. This ensures that the new domain and row
5837  // Maps _stay_ identical.
5838  domainMap = newMap;
5839  } else {
5840  domainMap = domainMap_->replaceCommWithSubset(newComm);
5841  }
5842  }
5843  if (!rangeMap_.is_null()) {
5844  if (rangeMap_.getRawPtr() == rowMap_.getRawPtr()) {
5845  // Common case: original range and row Maps are identical. In
5846  // that case, we need only replace the original range Map with
5847  // the new Map. This ensures that the new range and row Maps
5848  // _stay_ identical.
5849  rangeMap = newMap;
5850  } else {
5851  rangeMap = rangeMap_->replaceCommWithSubset(newComm);
5852  }
5853  }
5854  if (!colMap_.is_null()) {
5855  colMap = colMap_->replaceCommWithSubset(newComm);
5856  }
5857 
5858  // (Re)create the Export and / or Import if necessary.
5859  if (!newComm.is_null()) {
5860  RCP<ParameterList> params = this->getNonconstParameterList(); // could be null
5861  //
5862  // The operations below are collective on the new communicator.
5863  //
5864  // (Re)create the Export object if necessary. If I haven't
5865  // called fillComplete yet, I don't have a rangeMap, so I must
5866  // first check if the _original_ rangeMap is not null. Ditto
5867  // for the Import object and the domain Map.
5868  if (!rangeMap_.is_null() &&
5869  rangeMap != rowMap &&
5870  !rangeMap->isSameAs(*rowMap)) {
5871  if (params.is_null() || !params->isSublist("Export")) {
5872  exporter = rcp(new export_type(rowMap, rangeMap));
5873  } else {
5874  RCP<ParameterList> exportSublist = sublist(params, "Export", true);
5875  exporter = rcp(new export_type(rowMap, rangeMap, exportSublist));
5876  }
5877  }
5878  // (Re)create the Import object if necessary.
5879  if (!domainMap_.is_null() &&
5880  domainMap != colMap &&
5881  !domainMap->isSameAs(*colMap)) {
5882  if (params.is_null() || !params->isSublist("Import")) {
5883  importer = rcp(new import_type(domainMap, colMap));
5884  } else {
5885  RCP<ParameterList> importSublist = sublist(params, "Import", true);
5886  importer = rcp(new import_type(domainMap, colMap, importSublist));
5887  }
5888  }
5889  } // if newComm is not null
5890 
5891  // Defer side effects until the end. If no destructors throw
5892  // exceptions (they shouldn't anyway), then this method satisfies
5893  // the strong exception guarantee.
5894  exporter_ = exporter;
5895  importer_ = importer;
5896  rowMap_ = rowMap;
5897  // mfh 31 Mar 2013: DistObject's map_ is the row Map of a CrsGraph
5898  // or CrsMatrix. CrsGraph keeps a redundant pointer (rowMap_) to
5899  // the same object. We might want to get rid of this redundant
5900  // pointer sometime, but for now, we'll leave it alone and just
5901  // set map_ to the same object.
5902  this->map_ = rowMap;
5903  domainMap_ = domainMap;
5904  rangeMap_ = rangeMap;
5905  colMap_ = colMap;
5906 }
5907 
5908 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5910  getLocalDiagOffsets(const Kokkos::View<size_t*, device_type, Kokkos::MemoryUnmanaged>& offsets) const {
5911  using std::endl;
5912  using LO = LocalOrdinal;
5913  using GO = GlobalOrdinal;
5914  const char tfecfFuncName[] = "getLocalDiagOffsets: ";
5915  const bool verbose = verbose_;
5916 
5917  std::unique_ptr<std::string> prefix;
5918  if (verbose) {
5919  prefix = this->createPrefix("CrsGraph", "getLocalDiagOffsets");
5920  std::ostringstream os;
5921  os << *prefix << "offsets.extent(0)=" << offsets.extent(0)
5922  << endl;
5923  std::cerr << os.str();
5924  }
5925 
5926  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!hasColMap(), std::runtime_error, "The graph must have a column Map.");
5927  const LO lclNumRows = static_cast<LO>(this->getLocalNumRows());
5928  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<LO>(offsets.extent(0)) < lclNumRows,
5929  std::invalid_argument, "offsets.extent(0) = " << offsets.extent(0) << " < getLocalNumRows() = " << lclNumRows << ".");
5930 
5931  const map_type& rowMap = *(this->getRowMap());
5932  const map_type& colMap = *(this->getColMap());
5933 
5934  // We only use these in debug mode, but since debug mode is a
5935  // run-time option, they need to exist here. That's why we create
5936  // the vector with explicit size zero, to avoid overhead if debug
5937  // mode is off.
5938  bool allRowMapDiagEntriesInColMap = true;
5939  bool allDiagEntriesFound = true;
5940  bool allOffsetsCorrect = true;
5941  bool noOtherWeirdness = true;
5942  using wrong_offsets_type = std::vector<std::pair<LO, size_t>>;
5943  wrong_offsets_type wrongOffsets(0);
5944 
5945  // mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just
5946  // the subset of Map functionality that we need below.
5947  auto lclRowMap = rowMap.getLocalMap();
5948  auto lclColMap = colMap.getLocalMap();
5949 
5950  // FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this
5951  // setup, at least on the host. For CUDA, we have to use LocalMap
5952  // (that comes from each of the two Maps).
5953 
5954  const bool sorted = this->isSorted();
5955  if (isFillComplete()) {
5956  auto lclGraph = this->getLocalGraphDevice();
5957  ::Tpetra::Details::getGraphDiagOffsets(offsets, lclRowMap, lclColMap,
5958  lclGraph.row_map,
5959  lclGraph.entries, sorted);
5960  } else {
5961  // NOTE (mfh 22 Feb 2017): We have to run this code on host,
5962  // since the graph is not fill complete. The previous version
5963  // of this code assumed UVM; this version does not.
5964  auto offsets_h = Kokkos::create_mirror_view(offsets);
5965 
5966  for (LO lclRowInd = 0; lclRowInd < lclNumRows; ++lclRowInd) {
5967  // Find the diagonal entry. Since the row Map and column Map
5968  // may differ, we have to compare global row and column
5969  // indices, not local.
5970  const GO gblRowInd = lclRowMap.getGlobalElement(lclRowInd);
5971  const GO gblColInd = gblRowInd;
5972  const LO lclColInd = lclColMap.getLocalElement(gblColInd);
5973 
5974  if (lclColInd == Tpetra::Details::OrdinalTraits<LO>::invalid()) {
5975  allRowMapDiagEntriesInColMap = false;
5976  offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid();
5977  } else {
5978  const RowInfo rowInfo = this->getRowInfo(lclRowInd);
5979  if (static_cast<LO>(rowInfo.localRow) == lclRowInd &&
5980  rowInfo.numEntries > 0) {
5981  auto colInds = this->getLocalIndsViewHost(rowInfo);
5982  const size_t hint = 0; // not needed for this algorithm
5983  const size_t offset =
5984  KokkosSparse::findRelOffset(colInds, rowInfo.numEntries,
5985  lclColInd, hint, sorted);
5986  offsets_h(lclRowInd) = offset;
5987 
5988  if (debug_) {
5989  // Now that we have what we think is an offset, make sure
5990  // that it really does point to the diagonal entry. Offsets
5991  // are _relative_ to each row, not absolute (for the whole
5992  // (local) graph).
5993  typename local_inds_dualv_type::t_host::const_type lclColInds;
5994  try {
5995  lclColInds = this->getLocalIndsViewHost(rowInfo);
5996  } catch (...) {
5997  noOtherWeirdness = false;
5998  }
5999  // Don't continue with error checking if the above failed.
6000  if (noOtherWeirdness) {
6001  const size_t numEnt = lclColInds.extent(0);
6002  if (offset >= numEnt) {
6003  // Offsets are relative to each row, so this means that
6004  // the offset is out of bounds.
6005  allOffsetsCorrect = false;
6006  wrongOffsets.push_back(std::make_pair(lclRowInd, offset));
6007  } else {
6008  const LO actualLclColInd = lclColInds(offset);
6009  const GO actualGblColInd = lclColMap.getGlobalElement(actualLclColInd);
6010  if (actualGblColInd != gblColInd) {
6011  allOffsetsCorrect = false;
6012  wrongOffsets.push_back(std::make_pair(lclRowInd, offset));
6013  }
6014  }
6015  }
6016  } // debug_
6017  } else { // either row is empty, or something went wrong w/ getRowInfo()
6018  offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid();
6019  allDiagEntriesFound = false;
6020  }
6021  } // whether lclColInd is a valid local column index
6022  } // for each local row
6023  // DEEP_COPY REVIEW - NOT TESTED
6024  Kokkos::deep_copy(offsets, offsets_h);
6025  } // whether the graph is fill complete
6026 
6027  if (verbose && wrongOffsets.size() != 0) {
6028  std::ostringstream os;
6029  os << *prefix << "Wrong offsets: [";
6030  for (size_t k = 0; k < wrongOffsets.size(); ++k) {
6031  os << "(" << wrongOffsets[k].first << ","
6032  << wrongOffsets[k].second << ")";
6033  if (k + 1 < wrongOffsets.size()) {
6034  os << ", ";
6035  }
6036  }
6037  os << "]" << endl;
6038  std::cerr << os.str();
6039  }
6040 
6041  if (debug_) {
6042  using std::endl;
6043  using Teuchos::reduceAll;
6044  Teuchos::RCP<const Teuchos::Comm<int>> comm = this->getComm();
6045  const bool localSuccess =
6046  allRowMapDiagEntriesInColMap && allDiagEntriesFound && allOffsetsCorrect;
6047  const int numResults = 5;
6048  int lclResults[5];
6049  lclResults[0] = allRowMapDiagEntriesInColMap ? 1 : 0;
6050  lclResults[1] = allDiagEntriesFound ? 1 : 0;
6051  lclResults[2] = allOffsetsCorrect ? 1 : 0;
6052  lclResults[3] = noOtherWeirdness ? 1 : 0;
6053  // min-all-reduce will compute least rank of all the processes
6054  // that didn't succeed.
6055  lclResults[4] = !localSuccess ? comm->getRank() : comm->getSize();
6056 
6057  int gblResults[5];
6058  gblResults[0] = 0;
6059  gblResults[1] = 0;
6060  gblResults[2] = 0;
6061  gblResults[3] = 0;
6062  gblResults[4] = 0;
6063  reduceAll<int, int>(*comm, Teuchos::REDUCE_MIN,
6064  numResults, lclResults, gblResults);
6065 
6066  if (gblResults[0] != 1 || gblResults[1] != 1 || gblResults[2] != 1 || gblResults[3] != 1) {
6067  std::ostringstream os; // build error message
6068  os << "Issue(s) that we noticed (on Process " << gblResults[4] << ", "
6069  "possibly among others): "
6070  << endl;
6071  if (gblResults[0] == 0) {
6072  os << " - The column Map does not contain at least one diagonal entry "
6073  "of the graph."
6074  << endl;
6075  }
6076  if (gblResults[1] == 0) {
6077  os << " - On one or more processes, some row does not contain a "
6078  "diagonal entry."
6079  << endl;
6080  }
6081  if (gblResults[2] == 0) {
6082  os << " - On one or more processes, some offsets are incorrect."
6083  << endl;
6084  }
6085  if (gblResults[3] == 0) {
6086  os << " - One or more processes had some other error."
6087  << endl;
6088  }
6089  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str());
6090  }
6091  } // debug_
6092 }
6093 
6094 template <class LocalOrdinal, class GlobalOrdinal, class Node>
6096  getLocalOffRankOffsets(offset_device_view_type& offsets) const {
6097  using std::endl;
6098  const char tfecfFuncName[] = "getLocalOffRankOffsets: ";
6099  const bool verbose = verbose_;
6100 
6101  std::unique_ptr<std::string> prefix;
6102  if (verbose) {
6103  prefix = this->createPrefix("CrsGraph", "getLocalOffRankOffsets");
6104  std::ostringstream os;
6105  os << *prefix << "offsets.extent(0)=" << offsets.extent(0)
6106  << endl;
6107  std::cerr << os.str();
6108  }
6109 
6110  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!hasColMap(), std::runtime_error, "The graph must have a column Map.");
6111  // Instead of throwing, we could also copy the rowPtr to k_offRankOffsets_.
6112 
6113  const size_t lclNumRows = this->getLocalNumRows();
6114 
6115  if (haveLocalOffRankOffsets_ && k_offRankOffsets_.extent(0) == lclNumRows + 1) {
6116  offsets = k_offRankOffsets_;
6117  return;
6118  }
6119  haveLocalOffRankOffsets_ = false;
6120 
6121  const map_type& colMap = *(this->getColMap());
6122  const map_type& domMap = *(this->getDomainMap());
6123 
6124  // mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just
6125  // the subset of Map functionality that we need below.
6126  auto lclColMap = colMap.getLocalMap();
6127  auto lclDomMap = domMap.getLocalMap();
6128 
6129  // FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this
6130  // setup, at least on the host. For CUDA, we have to use LocalMap
6131  // (that comes from each of the two Maps).
6132 
6133  TEUCHOS_ASSERT(this->isSorted());
6134  if (isFillComplete()) {
6135  k_offRankOffsets_ = offset_device_view_type(Kokkos::ViewAllocateWithoutInitializing("offRankOffset"), lclNumRows + 1);
6136  auto lclGraph = this->getLocalGraphDevice();
6137  ::Tpetra::Details::getGraphOffRankOffsets(k_offRankOffsets_,
6138  lclColMap, lclDomMap,
6139  lclGraph);
6140  offsets = k_offRankOffsets_;
6141  haveLocalOffRankOffsets_ = true;
6142  } else {
6143  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::logic_error, "Can't get off-rank offsets for non-fill-complete graph");
6144  }
6145 }
6146 
6147 namespace { // (anonymous)
6148 
6149 // mfh 21 Jan 2016: This is useful for getLocalDiagOffsets (see
6150 // below). The point is to avoid the deep copy between the input
6151 // Teuchos::ArrayRCP and the internally used Kokkos::View. We
6152 // can't use UVM to avoid the deep copy with CUDA, because the
6153 // ArrayRCP is a host pointer, while the input to the graph's
6154 // getLocalDiagOffsets method is a device pointer. Assigning a
6155 // host pointer to a device pointer is incorrect unless the host
6156 // pointer points to host pinned memory. The goal is to get rid
6157 // of the Teuchos::ArrayRCP overload anyway, so we accept the deep
6158 // copy for backwards compatibility.
6159 //
6160 // We have to use template magic because
6161 // "staticGraph_->getLocalDiagOffsets(offsetsHosts)" won't compile
6162 // if device_type::memory_space is not Kokkos::HostSpace (as is
6163 // the case with CUDA).
6164 
6165 template <class DeviceType,
6166  const bool memSpaceIsHostSpace =
6167  std::is_same<typename DeviceType::memory_space,
6168  Kokkos::HostSpace>::value>
6169 struct HelpGetLocalDiagOffsets {};
6170 
6171 template <class DeviceType>
6172 struct HelpGetLocalDiagOffsets<DeviceType, true> {
6173  typedef DeviceType device_type;
6174  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6175  Kokkos::MemoryUnmanaged>
6176  device_offsets_type;
6177  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6178  Kokkos::MemoryUnmanaged>
6179  host_offsets_type;
6180 
6181  static device_offsets_type
6182  getDeviceOffsets(const host_offsets_type& hostOffsets) {
6183  // Host and device are the same; no need to allocate a
6184  // temporary device View.
6185  return hostOffsets;
6186  }
6187 
6188  static void
6189  copyBackIfNeeded(const host_offsets_type& /* hostOffsets */,
6190  const device_offsets_type& /* deviceOffsets */) { /* copy back not needed; host and device are the same */
6191  }
6192 };
6193 
6194 template <class DeviceType>
6195 struct HelpGetLocalDiagOffsets<DeviceType, false> {
6196  typedef DeviceType device_type;
6197  // We have to do a deep copy, since host memory space != device
6198  // memory space. Thus, the device View is managed (we need to
6199  // allocate a temporary device View).
6200  typedef Kokkos::View<size_t*, device_type> device_offsets_type;
6201  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6202  Kokkos::MemoryUnmanaged>
6203  host_offsets_type;
6204 
6205  static device_offsets_type
6206  getDeviceOffsets(const host_offsets_type& hostOffsets) {
6207  // Host memory space != device memory space, so we must
6208  // allocate a temporary device View for the graph.
6209  return device_offsets_type("offsets", hostOffsets.extent(0));
6210  }
6211 
6212  static void
6213  copyBackIfNeeded(const host_offsets_type& hostOffsets,
6214  const device_offsets_type& deviceOffsets) {
6215  // DEEP_COPY REVIEW - NOT TESTED
6216  Kokkos::deep_copy(hostOffsets, deviceOffsets);
6217  }
6218 };
6219 } // namespace
6220 
6221 template <class LocalOrdinal, class GlobalOrdinal, class Node>
6223  getLocalDiagOffsets(Teuchos::ArrayRCP<size_t>& offsets) const {
6224  typedef LocalOrdinal LO;
6225  const char tfecfFuncName[] = "getLocalDiagOffsets: ";
6226  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->hasColMap(), std::runtime_error,
6227  "The graph does not yet have a column Map.");
6228  const LO myNumRows = static_cast<LO>(this->getLocalNumRows());
6229  if (static_cast<LO>(offsets.size()) != myNumRows) {
6230  // NOTE (mfh 21 Jan 2016) This means that the method does not
6231  // satisfy the strong exception guarantee (no side effects
6232  // unless successful).
6233  offsets.resize(myNumRows);
6234  }
6235 
6236  // mfh 21 Jan 2016: This method unfortunately takes a
6237  // Teuchos::ArrayRCP, which is host memory. The graph wants a
6238  // device pointer. We can't access host memory from the device;
6239  // that's the wrong direction for UVM. (It's the right direction
6240  // for inefficient host pinned memory, but we don't want to use
6241  // that here.) Thus, if device memory space != host memory space,
6242  // we allocate and use a temporary device View to get the offsets.
6243  // If the two spaces are equal, the template magic makes the deep
6244  // copy go away.
6245  typedef HelpGetLocalDiagOffsets<device_type> helper_type;
6246  typedef typename helper_type::host_offsets_type host_offsets_type;
6247  // Unmanaged host View that views the output array.
6248  host_offsets_type hostOffsets(offsets.getRawPtr(), myNumRows);
6249  // Allocate temp device View if host != device, else reuse host array.
6250  auto deviceOffsets = helper_type::getDeviceOffsets(hostOffsets);
6251  // NOT recursion; this calls the overload that takes a device View.
6252  this->getLocalDiagOffsets(deviceOffsets);
6253  helper_type::copyBackIfNeeded(hostOffsets, deviceOffsets);
6254 }
6255 
6256 template <class LocalOrdinal, class GlobalOrdinal, class Node>
6259  return true;
6260 }
6261 
6262 template <class LocalOrdinal, class GlobalOrdinal, class Node>
6265  const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& rowTransfer,
6266  const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>>& domainTransfer,
6267  const Teuchos::RCP<const map_type>& domainMap,
6268  const Teuchos::RCP<const map_type>& rangeMap,
6269  const Teuchos::RCP<Teuchos::ParameterList>& params) const {
6270  using Teuchos::ArrayRCP;
6271  using Teuchos::ArrayView;
6272  using Teuchos::Comm;
6273  using Teuchos::ParameterList;
6274  using Teuchos::rcp;
6275  using Teuchos::RCP;
6280 #ifdef HAVE_TPETRA_MMM_TIMINGS
6281  using std::string;
6282  using Teuchos::TimeMonitor;
6283 #endif
6284 
6285  using LO = LocalOrdinal;
6286  using GO = GlobalOrdinal;
6287  using NT = node_type;
6288  using this_CRS_type = CrsGraph<LO, GO, NT>;
6289  using ivector_type = Vector<int, LO, GO, NT>;
6290 
6291  const char* prefix = "Tpetra::CrsGraph::transferAndFillComplete: ";
6292 
6293 #ifdef HAVE_TPETRA_MMM_TIMINGS
6294  string label;
6295  if (!params.is_null()) label = params->get("Timer Label", label);
6296  string prefix2 = string("Tpetra ") + label + std::string(": CrsGraph TAFC ");
6297  RCP<TimeMonitor> MM =
6298  rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2 + string("Pack-1"))));
6299 #endif
6300 
6301  // Make sure that the input argument rowTransfer is either an
6302  // Import or an Export. Import and Export are the only two
6303  // subclasses of Transfer that we defined, but users might
6304  // (unwisely, for now at least) decide to implement their own
6305  // subclasses. Exclude this possibility.
6306  const import_type* xferAsImport = dynamic_cast<const import_type*>(&rowTransfer);
6307  const export_type* xferAsExport = dynamic_cast<const export_type*>(&rowTransfer);
6308  TEUCHOS_TEST_FOR_EXCEPTION(
6309  xferAsImport == nullptr && xferAsExport == nullptr, std::invalid_argument,
6310  prefix << "The 'rowTransfer' input argument must be either an Import or "
6311  "an Export, and its template parameters must match the corresponding "
6312  "template parameters of the CrsGraph.");
6313 
6314  // Make sure that the input argument domainTransfer is either an
6315  // Import or an Export. Import and Export are the only two
6316  // subclasses of Transfer that we defined, but users might
6317  // (unwisely, for now at least) decide to implement their own
6318  // subclasses. Exclude this possibility.
6319  Teuchos::RCP<const import_type> xferDomainAsImport =
6320  Teuchos::rcp_dynamic_cast<const import_type>(domainTransfer);
6321  Teuchos::RCP<const export_type> xferDomainAsExport =
6322  Teuchos::rcp_dynamic_cast<const export_type>(domainTransfer);
6323 
6324  if (!domainTransfer.is_null()) {
6325  TEUCHOS_TEST_FOR_EXCEPTION(
6326  (xferDomainAsImport.is_null() && xferDomainAsExport.is_null()), std::invalid_argument,
6327  prefix << "The 'domainTransfer' input argument must be either an "
6328  "Import or an Export, and its template parameters must match the "
6329  "corresponding template parameters of the CrsGraph.");
6330 
6331  TEUCHOS_TEST_FOR_EXCEPTION(
6332  (xferAsImport != nullptr || !xferDomainAsImport.is_null()) &&
6333  ((xferAsImport != nullptr && xferDomainAsImport.is_null()) ||
6334  (xferAsImport == nullptr && !xferDomainAsImport.is_null())),
6335  std::invalid_argument,
6336  prefix << "The 'rowTransfer' and 'domainTransfer' input arguments "
6337  "must be of the same type (either Import or Export).");
6338 
6339  TEUCHOS_TEST_FOR_EXCEPTION(
6340  (xferAsExport != nullptr || !xferDomainAsExport.is_null()) &&
6341  ((xferAsExport != nullptr && xferDomainAsExport.is_null()) ||
6342  (xferAsExport == nullptr && !xferDomainAsExport.is_null())),
6343  std::invalid_argument,
6344  prefix << "The 'rowTransfer' and 'domainTransfer' input arguments "
6345  "must be of the same type (either Import or Export).");
6346 
6347  } // domainTransfer != null
6348 
6349  // FIXME (mfh 15 May 2014) Wouldn't communication still be needed,
6350  // if the source Map is not distributed but the target Map is?
6351  const bool communication_needed = rowTransfer.getSourceMap()->isDistributed();
6352 
6353  //
6354  // Get the caller's parameters
6355  //
6356 
6357  bool reverseMode = false; // Are we in reverse mode?
6358  bool restrictComm = false; // Do we need to restrict the communicator?
6359  RCP<ParameterList> graphparams; // parameters for the destination graph
6360  if (!params.is_null()) {
6361  reverseMode = params->get("Reverse Mode", reverseMode);
6362  restrictComm = params->get("Restrict Communicator", restrictComm);
6363  graphparams = sublist(params, "CrsGraph");
6364  }
6365 
6366  // Get the new domain and range Maps. We need some of them for error
6367  // checking, now that we have the reverseMode parameter.
6368  RCP<const map_type> MyRowMap = reverseMode ? rowTransfer.getSourceMap() : rowTransfer.getTargetMap();
6369  RCP<const map_type> MyColMap; // create this below
6370  RCP<const map_type> MyDomainMap = !domainMap.is_null() ? domainMap : getDomainMap();
6371  RCP<const map_type> MyRangeMap = !rangeMap.is_null() ? rangeMap : getRangeMap();
6372  RCP<const map_type> BaseRowMap = MyRowMap;
6373  RCP<const map_type> BaseDomainMap = MyDomainMap;
6374 
6375  // If the user gave us a nonnull destGraph, then check whether it's
6376  // "pristine." That means that it has no entries.
6377  //
6378  // FIXME (mfh 15 May 2014) If this is not true on all processes,
6379  // then this exception test may hang. It would be better to
6380  // forward an error flag to the next communication phase.
6381  if (!destGraph.is_null()) {
6382  // FIXME (mfh 15 May 2014): The Epetra idiom for checking
6383  // whether a graph or matrix has no entries on the calling
6384  // process, is that it is neither locally nor globally indexed.
6385  // This may change eventually with the Kokkos refactor version
6386  // of Tpetra, so it would be better just to check the quantity
6387  // of interest directly. Note that with the Kokkos refactor
6388  // version of Tpetra, asking for the total number of entries in
6389  // a graph or matrix that is not fill complete might require
6390  // computation (kernel launch), since it is not thread scalable
6391  // to update a count every time an entry is inserted.
6392  const bool NewFlag =
6393  !destGraph->isLocallyIndexed() && !destGraph->isGloballyIndexed();
6394  TEUCHOS_TEST_FOR_EXCEPTION(!NewFlag, std::invalid_argument,
6395  prefix << "The input argument 'destGraph' is only allowed to be nonnull, "
6396  "if its graph is empty (neither locally nor globally indexed).");
6397 
6398  // FIXME (mfh 15 May 2014) At some point, we want to change
6399  // graphs and matrices so that their DistObject Map
6400  // (this->getMap()) may differ from their row Map. This will
6401  // make redistribution for 2-D distributions more efficient. I
6402  // hesitate to change this check, because I'm not sure how much
6403  // the code here depends on getMap() and getRowMap() being the
6404  // same.
6405  TEUCHOS_TEST_FOR_EXCEPTION(
6406  !destGraph->getRowMap()->isSameAs(*MyRowMap), std::invalid_argument,
6407  prefix << "The (row) Map of the input argument 'destGraph' is not the "
6408  "same as the (row) Map specified by the input argument 'rowTransfer'.");
6409 
6410  TEUCHOS_TEST_FOR_EXCEPTION(
6411  !destGraph->checkSizes(*this), std::invalid_argument,
6412  prefix << "You provided a nonnull destination graph, but checkSizes() "
6413  "indicates that it is not a legal legal target for redistribution from "
6414  "the source graph (*this). This may mean that they do not have the "
6415  "same dimensions.");
6416  }
6417 
6418  // If forward mode (the default), then *this's (row) Map must be
6419  // the same as the source Map of the Transfer. If reverse mode,
6420  // then *this's (row) Map must be the same as the target Map of
6421  // the Transfer.
6422  //
6423  // FIXME (mfh 15 May 2014) At some point, we want to change graphs
6424  // and matrices so that their DistObject Map (this->getMap()) may
6425  // differ from their row Map. This will make redistribution for
6426  // 2-D distributions more efficient. I hesitate to change this
6427  // check, because I'm not sure how much the code here depends on
6428  // getMap() and getRowMap() being the same.
6429  TEUCHOS_TEST_FOR_EXCEPTION(
6430  !(reverseMode || getRowMap()->isSameAs(*rowTransfer.getSourceMap())),
6431  std::invalid_argument, prefix << "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
6432 
6433  TEUCHOS_TEST_FOR_EXCEPTION(
6434  !(!reverseMode || getRowMap()->isSameAs(*rowTransfer.getTargetMap())),
6435  std::invalid_argument, prefix << "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
6436 
6437  // checks for domainTransfer
6438  TEUCHOS_TEST_FOR_EXCEPTION(
6439  !xferDomainAsImport.is_null() && !xferDomainAsImport->getTargetMap()->isSameAs(*domainMap),
6440  std::invalid_argument,
6441  prefix << "The target map of the 'domainTransfer' input argument must be "
6442  "the same as the rebalanced domain map 'domainMap'");
6443 
6444  TEUCHOS_TEST_FOR_EXCEPTION(
6445  !xferDomainAsExport.is_null() && !xferDomainAsExport->getSourceMap()->isSameAs(*domainMap),
6446  std::invalid_argument,
6447  prefix << "The source map of the 'domainTransfer' input argument must be "
6448  "the same as the rebalanced domain map 'domainMap'");
6449 
6450  // The basic algorithm here is:
6451  //
6452  // 1. Call the moral equivalent of "Distor.do" to handle the import.
6453  // 2. Copy all the Imported and Copy/Permuted data into the raw
6454  // CrsGraph pointers, still using GIDs.
6455  // 3. Call an optimized version of MakeColMap that avoids the
6456  // Directory lookups (since the importer knows who owns all the
6457  // GIDs) AND reindexes to LIDs.
6458  // 4. Call expertStaticFillComplete()
6459 
6460  // Get information from the Importer
6461  const size_t NumSameIDs = rowTransfer.getNumSameIDs();
6462  ArrayView<const LO> ExportLIDs = reverseMode ? rowTransfer.getRemoteLIDs() : rowTransfer.getExportLIDs();
6463  ArrayView<const LO> RemoteLIDs = reverseMode ? rowTransfer.getExportLIDs() : rowTransfer.getRemoteLIDs();
6464  ArrayView<const LO> PermuteToLIDs = reverseMode ? rowTransfer.getPermuteFromLIDs() : rowTransfer.getPermuteToLIDs();
6465  ArrayView<const LO> PermuteFromLIDs = reverseMode ? rowTransfer.getPermuteToLIDs() : rowTransfer.getPermuteFromLIDs();
6466  Distributor& Distor = rowTransfer.getDistributor();
6467 
6468  // Owning PIDs
6469  Teuchos::Array<int> SourcePids;
6470  Teuchos::Array<int> TargetPids;
6471  int MyPID = getComm()->getRank();
6472 
6473  // Temp variables for sub-communicators
6474  RCP<const map_type> ReducedRowMap, ReducedColMap,
6475  ReducedDomainMap, ReducedRangeMap;
6476  RCP<const Comm<int>> ReducedComm;
6477 
6478  // If the user gave us a null destGraph, then construct the new
6479  // destination graph. We will replace its column Map later.
6480  if (destGraph.is_null()) {
6481  destGraph = rcp(new this_CRS_type(MyRowMap, 0, graphparams));
6482  }
6483 
6484  /***************************************************/
6485  /***** 1) First communicator restriction phase ****/
6486  /***************************************************/
6487  if (restrictComm) {
6488  ReducedRowMap = MyRowMap->removeEmptyProcesses();
6489  ReducedComm = ReducedRowMap.is_null() ? Teuchos::null : ReducedRowMap->getComm();
6490  destGraph->removeEmptyProcessesInPlace(ReducedRowMap);
6491 
6492  ReducedDomainMap = MyRowMap.getRawPtr() == MyDomainMap.getRawPtr() ? ReducedRowMap : MyDomainMap->replaceCommWithSubset(ReducedComm);
6493  ReducedRangeMap = MyRowMap.getRawPtr() == MyRangeMap.getRawPtr() ? ReducedRowMap : MyRangeMap->replaceCommWithSubset(ReducedComm);
6494 
6495  // Reset the "my" maps
6496  MyRowMap = ReducedRowMap;
6497  MyDomainMap = ReducedDomainMap;
6498  MyRangeMap = ReducedRangeMap;
6499 
6500  // Update my PID, if we've restricted the communicator
6501  if (!ReducedComm.is_null()) {
6502  MyPID = ReducedComm->getRank();
6503  } else {
6504  MyPID = -2; // For debugging
6505  }
6506  } else {
6507  ReducedComm = MyRowMap->getComm();
6508  }
6509 
6510  /***************************************************/
6511  /***** 2) From Tpera::DistObject::doTransfer() ****/
6512  /***************************************************/
6513 #ifdef HAVE_TPETRA_MMM_TIMINGS
6514  MM = Teuchos::null;
6515  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2 + string("ImportSetup"))));
6516 #endif
6517  // Get the owning PIDs
6518  RCP<const import_type> MyImporter = getImporter();
6519 
6520  // check whether domain maps of source graph and base domain map is the same
6521  bool bSameDomainMap = BaseDomainMap->isSameAs(*getDomainMap());
6522 
6523  if (!restrictComm && !MyImporter.is_null() && bSameDomainMap) {
6524  // Same domain map as source graph
6525  //
6526  // NOTE: This won't work for restrictComm (because the Import
6527  // doesn't know the restricted PIDs), though writing an
6528  // optimized version for that case would be easy (Import an
6529  // IntVector of the new PIDs). Might want to add this later.
6530  Import_Util::getPids(*MyImporter, SourcePids, false);
6531  } else if (restrictComm && !MyImporter.is_null() && bSameDomainMap) {
6532  // Same domain map as source graph (restricted communicator)
6533  // We need one import from the domain to the column map
6534  ivector_type SourceDomain_pids(getDomainMap(), true);
6535  ivector_type SourceCol_pids(getColMap());
6536  // SourceDomain_pids contains the restricted pids
6537  SourceDomain_pids.putScalar(MyPID);
6538 
6539  SourceCol_pids.doImport(SourceDomain_pids, *MyImporter, INSERT);
6540  SourcePids.resize(getColMap()->getLocalNumElements());
6541  SourceCol_pids.get1dCopy(SourcePids());
6542  } else if (MyImporter.is_null() && bSameDomainMap) {
6543  // Graph has no off-process entries
6544  SourcePids.resize(getColMap()->getLocalNumElements());
6545  SourcePids.assign(getColMap()->getLocalNumElements(), MyPID);
6546  } else if (!MyImporter.is_null() &&
6547  !domainTransfer.is_null()) {
6548  // general implementation for rectangular matrices with
6549  // domain map different than SourceGraph domain map.
6550  // User has to provide a DomainTransfer object. We need
6551  // to communications (import/export)
6552 
6553  // TargetDomain_pids lives on the rebalanced new domain map
6554  ivector_type TargetDomain_pids(domainMap);
6555  TargetDomain_pids.putScalar(MyPID);
6556 
6557  // SourceDomain_pids lives on the non-rebalanced old domain map
6558  ivector_type SourceDomain_pids(getDomainMap());
6559 
6560  // SourceCol_pids lives on the non-rebalanced old column map
6561  ivector_type SourceCol_pids(getColMap());
6562 
6563  if (!reverseMode && !xferDomainAsImport.is_null()) {
6564  SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsImport, INSERT);
6565  } else if (reverseMode && !xferDomainAsExport.is_null()) {
6566  SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsExport, INSERT);
6567  } else if (!reverseMode && !xferDomainAsExport.is_null()) {
6568  SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsExport, INSERT);
6569  } else if (reverseMode && !xferDomainAsImport.is_null()) {
6570  SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsImport, INSERT);
6571  } else {
6572  TEUCHOS_TEST_FOR_EXCEPTION(
6573  true, std::logic_error,
6574  prefix << "Should never get here! Please report this bug to a Tpetra developer.");
6575  }
6576  SourceCol_pids.doImport(SourceDomain_pids, *MyImporter, INSERT);
6577  SourcePids.resize(getColMap()->getLocalNumElements());
6578  SourceCol_pids.get1dCopy(SourcePids());
6579  } else if (BaseDomainMap->isSameAs(*BaseRowMap) &&
6580  getDomainMap()->isSameAs(*getRowMap())) {
6581  // We can use the rowTransfer + SourceGraph's Import to find out who owns what.
6582  ivector_type TargetRow_pids(domainMap);
6583  ivector_type SourceRow_pids(getRowMap());
6584  ivector_type SourceCol_pids(getColMap());
6585 
6586  TargetRow_pids.putScalar(MyPID);
6587  if (!reverseMode && xferAsImport != nullptr) {
6588  SourceRow_pids.doExport(TargetRow_pids, *xferAsImport, INSERT);
6589  } else if (reverseMode && xferAsExport != nullptr) {
6590  SourceRow_pids.doExport(TargetRow_pids, *xferAsExport, INSERT);
6591  } else if (!reverseMode && xferAsExport != nullptr) {
6592  SourceRow_pids.doImport(TargetRow_pids, *xferAsExport, INSERT);
6593  } else if (reverseMode && xferAsImport != nullptr) {
6594  SourceRow_pids.doImport(TargetRow_pids, *xferAsImport, INSERT);
6595  } else {
6596  TEUCHOS_TEST_FOR_EXCEPTION(
6597  true, std::logic_error,
6598  prefix << "Should never get here! Please report this bug to a Tpetra developer.");
6599  }
6600  SourceCol_pids.doImport(SourceRow_pids, *MyImporter, INSERT);
6601  SourcePids.resize(getColMap()->getLocalNumElements());
6602  SourceCol_pids.get1dCopy(SourcePids());
6603  } else {
6604  TEUCHOS_TEST_FOR_EXCEPTION(
6605  true, std::invalid_argument,
6606  prefix << "This method only allows either domainMap == getDomainMap(), "
6607  "or (domainMap == rowTransfer.getTargetMap() and getDomainMap() == getRowMap()).");
6608  }
6609 
6610  // Tpetra-specific stuff
6611  size_t constantNumPackets = destGraph->constantNumberOfPackets();
6612  if (constantNumPackets == 0) {
6613  destGraph->reallocArraysForNumPacketsPerLid(ExportLIDs.size(),
6614  RemoteLIDs.size());
6615  } else {
6616  // There are a constant number of packets per element. We
6617  // already know (from the number of "remote" (incoming)
6618  // elements) how many incoming elements we expect, so we can
6619  // resize the buffer accordingly.
6620  const size_t rbufLen = RemoteLIDs.size() * constantNumPackets;
6621  destGraph->reallocImportsIfNeeded(rbufLen, false, nullptr);
6622  }
6623 
6624  {
6625  // packAndPrepare* methods modify numExportPacketsPerLID_.
6626  destGraph->numExportPacketsPerLID_.modify_host();
6627  Teuchos::ArrayView<size_t> numExportPacketsPerLID =
6628  getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
6629 
6630  // Pack & Prepare w/ owning PIDs
6631  packCrsGraphWithOwningPIDs(*this, destGraph->exports_,
6632  numExportPacketsPerLID, ExportLIDs,
6633  SourcePids, constantNumPackets);
6634  }
6635 
6636  // Do the exchange of remote data.
6637 #ifdef HAVE_TPETRA_MMM_TIMINGS
6638  MM = Teuchos::null;
6639  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2 + string("Transfer"))));
6640 #endif
6641 
6642  if (communication_needed) {
6643  if (reverseMode) {
6644  if (constantNumPackets == 0) { // variable number of packets per LID
6645  // Make sure that host has the latest version, since we're
6646  // using the version on host. If host has the latest
6647  // version, syncing to host does nothing.
6648  destGraph->numExportPacketsPerLID_.sync_host();
6649  Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
6650  getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
6651  destGraph->numImportPacketsPerLID_.sync_host();
6652  Teuchos::ArrayView<size_t> numImportPacketsPerLID =
6653  getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
6654 
6655  Distor.doReversePostsAndWaits(destGraph->numExportPacketsPerLID_.view_host(), 1,
6656  destGraph->numImportPacketsPerLID_.view_host());
6657  size_t totalImportPackets = 0;
6658  for (Array_size_type i = 0; i < numImportPacketsPerLID.size(); ++i) {
6659  totalImportPackets += numImportPacketsPerLID[i];
6660  }
6661 
6662  // Reallocation MUST go before setting the modified flag,
6663  // because it may clear out the flags.
6664  destGraph->reallocImportsIfNeeded(totalImportPackets, false, nullptr);
6665  destGraph->imports_.modify_host();
6666  auto hostImports = destGraph->imports_.view_host();
6667  // This is a legacy host pack/unpack path, so use the host
6668  // version of exports_.
6669  destGraph->exports_.sync_host();
6670  auto hostExports = destGraph->exports_.view_host();
6671  Distor.doReversePostsAndWaits(hostExports,
6672  numExportPacketsPerLID,
6673  hostImports,
6674  numImportPacketsPerLID);
6675  } else { // constant number of packets per LI
6676  destGraph->imports_.modify_host();
6677  auto hostImports = destGraph->imports_.view_host();
6678  // This is a legacy host pack/unpack path, so use the host
6679  // version of exports_.
6680  destGraph->exports_.sync_host();
6681  auto hostExports = destGraph->exports_.view_host();
6682  Distor.doReversePostsAndWaits(hostExports,
6683  constantNumPackets,
6684  hostImports);
6685  }
6686  } else { // forward mode (the default)
6687  if (constantNumPackets == 0) { // variable number of packets per LID
6688  // Make sure that host has the latest version, since we're
6689  // using the version on host. If host has the latest
6690  // version, syncing to host does nothing.
6691  destGraph->numExportPacketsPerLID_.sync_host();
6692  destGraph->numImportPacketsPerLID_.sync_host();
6693  Distor.doPostsAndWaits(destGraph->numExportPacketsPerLID_.view_host(), 1,
6694  destGraph->numImportPacketsPerLID_.view_host());
6695 
6696  Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
6697  getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
6698  size_t totalImportPackets = 0;
6699  for (Array_size_type i = 0; i < numImportPacketsPerLID.size(); ++i) {
6700  totalImportPackets += numImportPacketsPerLID[i];
6701  }
6702 
6703  // Reallocation MUST go before setting the modified flag,
6704  // because it may clear out the flags.
6705  destGraph->reallocImportsIfNeeded(totalImportPackets, false, nullptr);
6706  destGraph->imports_.modify_host();
6707  auto hostImports = destGraph->imports_.view_host();
6708  // This is a legacy host pack/unpack path, so use the host
6709  // version of exports_.
6710  destGraph->exports_.sync_host();
6711  auto hostExports = destGraph->exports_.view_host();
6712  Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
6713  getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
6714  Distor.doPostsAndWaits(hostExports, numExportPacketsPerLID, hostImports, numImportPacketsPerLID);
6715  } else { // constant number of packets per LID
6716  destGraph->imports_.modify_host();
6717  auto hostImports = destGraph->imports_.view_host();
6718  // This is a legacy host pack/unpack path, so use the host
6719  // version of exports_.
6720  destGraph->exports_.sync_host();
6721  auto hostExports = destGraph->exports_.view_host();
6722  Distor.doPostsAndWaits(hostExports, constantNumPackets, hostImports);
6723  }
6724  }
6725  }
6726 
6727  /*********************************************************************/
6728  /**** 3) Copy all of the Same/Permute/Remote data into CSR_arrays ****/
6729  /*********************************************************************/
6730 
6731 #ifdef HAVE_TPETRA_MMM_TIMINGS
6732  MM = Teuchos::null;
6733  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2 + string("Unpack-1"))));
6734 #endif
6735 
6736  // Backwards compatibility measure. We'll use this again below.
6737  destGraph->numImportPacketsPerLID_.sync_host();
6738  Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
6739  getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
6740  destGraph->imports_.sync_host();
6741  Teuchos::ArrayView<const packet_type> hostImports =
6742  getArrayViewFromDualView(destGraph->imports_);
6743  size_t mynnz =
6744  unpackAndCombineWithOwningPIDsCount(*this, RemoteLIDs, hostImports,
6745  numImportPacketsPerLID,
6746  constantNumPackets, INSERT,
6747  NumSameIDs, PermuteToLIDs, PermuteFromLIDs);
6748  size_t N = BaseRowMap->getLocalNumElements();
6749 
6750  // Allocations
6751  ArrayRCP<size_t> CSR_rowptr(N + 1);
6752  ArrayRCP<GO> CSR_colind_GID;
6753  ArrayRCP<LO> CSR_colind_LID;
6754  CSR_colind_GID.resize(mynnz);
6755 
6756  // If LO and GO are the same, we can reuse memory when
6757  // converting the column indices from global to local indices.
6758  if (typeid(LO) == typeid(GO)) {
6759  CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO>(CSR_colind_GID);
6760  } else {
6761  CSR_colind_LID.resize(mynnz);
6762  }
6763 
6764  // FIXME (mfh 15 May 2014) Why can't we abstract this out as an
6765  // unpackAndCombine method on a "CrsArrays" object? This passing
6766  // in a huge list of arrays is icky. Can't we have a bit of an
6767  // abstraction? Implementing a concrete DistObject subclass only
6768  // takes five methods.
6769  unpackAndCombineIntoCrsArrays(*this, RemoteLIDs, hostImports,
6770  numImportPacketsPerLID, constantNumPackets,
6771  INSERT, NumSameIDs, PermuteToLIDs,
6772  PermuteFromLIDs, N, mynnz, MyPID,
6773  CSR_rowptr(), CSR_colind_GID(),
6774  SourcePids(), TargetPids);
6775 
6776  /**************************************************************/
6777  /**** 4) Call Optimized MakeColMap w/ no Directory Lookups ****/
6778  /**************************************************************/
6779 #ifdef HAVE_TPETRA_MMM_TIMINGS
6780  MM = Teuchos::null;
6781  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2 + string("Unpack-2"))));
6782 #endif
6783  // Call an optimized version of makeColMap that avoids the
6784  // Directory lookups (since the Import object knows who owns all
6785  // the GIDs).
6786  Teuchos::Array<int> RemotePids;
6787  Import_Util::lowCommunicationMakeColMapAndReindex(CSR_rowptr(),
6788  CSR_colind_LID(),
6789  CSR_colind_GID(),
6790  BaseDomainMap,
6791  TargetPids, RemotePids,
6792  MyColMap);
6793 
6794  /*******************************************************/
6795  /**** 4) Second communicator restriction phase ****/
6796  /*******************************************************/
6797  if (restrictComm) {
6798  ReducedColMap = (MyRowMap.getRawPtr() == MyColMap.getRawPtr()) ? ReducedRowMap : MyColMap->replaceCommWithSubset(ReducedComm);
6799  MyColMap = ReducedColMap; // Reset the "my" maps
6800  }
6801 
6802  // Replace the col map
6803  destGraph->replaceColMap(MyColMap);
6804 
6805  // Short circuit if the processor is no longer in the communicator
6806  //
6807  // NOTE: Epetra replaces modifies all "removed" processes so they
6808  // have a dummy (serial) Map that doesn't touch the original
6809  // communicator. Duplicating that here might be a good idea.
6810  if (ReducedComm.is_null()) {
6811  return;
6812  }
6813 
6814  /***************************************************/
6815  /**** 5) Sort ****/
6816  /***************************************************/
6817  if ((!reverseMode && xferAsImport != nullptr) ||
6818  (reverseMode && xferAsExport != nullptr)) {
6819  Import_Util::sortCrsEntries(CSR_rowptr(),
6820  CSR_colind_LID());
6821  } else if ((!reverseMode && xferAsExport != nullptr) ||
6822  (reverseMode && xferAsImport != nullptr)) {
6823  Import_Util::sortAndMergeCrsEntries(CSR_rowptr(),
6824  CSR_colind_LID());
6825  if (CSR_rowptr[N] != mynnz) {
6826  CSR_colind_LID.resize(CSR_rowptr[N]);
6827  }
6828  } else {
6829  TEUCHOS_TEST_FOR_EXCEPTION(
6830  true, std::logic_error,
6831  prefix << "Should never get here! Please report this bug to a Tpetra developer.");
6832  }
6833  /***************************************************/
6834  /**** 6) Reset the colmap and the arrays ****/
6835  /***************************************************/
6836 
6837  // Call constructor for the new graph (restricted as needed)
6838  //
6839  destGraph->setAllIndices(CSR_rowptr, CSR_colind_LID);
6840 
6841  /***************************************************/
6842  /**** 7) Build Importer & Call ESFC ****/
6843  /***************************************************/
6844  // Pre-build the importer using the existing PIDs
6845  Teuchos::ParameterList esfc_params;
6846 #ifdef HAVE_TPETRA_MMM_TIMINGS
6847  MM = Teuchos::null;
6848  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2 + string("CreateImporter"))));
6849 #endif
6850  RCP<import_type> MyImport = rcp(new import_type(MyDomainMap, MyColMap, RemotePids));
6851 #ifdef HAVE_TPETRA_MMM_TIMINGS
6852  MM = Teuchos::null;
6853  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2 + string("ESFC"))));
6854 
6855  esfc_params.set("Timer Label", prefix + std::string("TAFC"));
6856 #endif
6857  if (!params.is_null())
6858  esfc_params.set("compute global constants", params->get("compute global constants", true));
6859 
6860  destGraph->expertStaticFillComplete(MyDomainMap, MyRangeMap,
6861  MyImport, Teuchos::null, rcp(&esfc_params, false));
6862 }
6863 
6864 template <class LocalOrdinal, class GlobalOrdinal, class Node>
6867  const import_type& importer,
6868  const Teuchos::RCP<const map_type>& domainMap,
6869  const Teuchos::RCP<const map_type>& rangeMap,
6870  const Teuchos::RCP<Teuchos::ParameterList>& params) const {
6871  transferAndFillComplete(destGraph, importer, Teuchos::null, domainMap, rangeMap, params);
6872 }
6873 
6874 template <class LocalOrdinal, class GlobalOrdinal, class Node>
6877  const import_type& rowImporter,
6878  const import_type& domainImporter,
6879  const Teuchos::RCP<const map_type>& domainMap,
6880  const Teuchos::RCP<const map_type>& rangeMap,
6881  const Teuchos::RCP<Teuchos::ParameterList>& params) const {
6882  transferAndFillComplete(destGraph, rowImporter, Teuchos::rcpFromRef(domainImporter), domainMap, rangeMap, params);
6883 }
6884 
6885 template <class LocalOrdinal, class GlobalOrdinal, class Node>
6888  const export_type& exporter,
6889  const Teuchos::RCP<const map_type>& domainMap,
6890  const Teuchos::RCP<const map_type>& rangeMap,
6891  const Teuchos::RCP<Teuchos::ParameterList>& params) const {
6892  transferAndFillComplete(destGraph, exporter, Teuchos::null, domainMap, rangeMap, params);
6893 }
6894 
6895 template <class LocalOrdinal, class GlobalOrdinal, class Node>
6898  const export_type& rowExporter,
6899  const export_type& domainExporter,
6900  const Teuchos::RCP<const map_type>& domainMap,
6901  const Teuchos::RCP<const map_type>& rangeMap,
6902  const Teuchos::RCP<Teuchos::ParameterList>& params) const {
6903  transferAndFillComplete(destGraph, rowExporter, Teuchos::rcpFromRef(domainExporter), domainMap, rangeMap, params);
6904 }
6905 
6906 template <class LocalOrdinal, class GlobalOrdinal, class Node>
6909  std::swap(graph.need_sync_host_uvm_access, this->need_sync_host_uvm_access);
6910 
6911  std::swap(graph.rowMap_, this->rowMap_);
6912  std::swap(graph.colMap_, this->colMap_);
6913  std::swap(graph.rangeMap_, this->rangeMap_);
6914  std::swap(graph.domainMap_, this->domainMap_);
6915 
6916  std::swap(graph.importer_, this->importer_);
6917  std::swap(graph.exporter_, this->exporter_);
6918 
6919  std::swap(graph.nodeMaxNumRowEntries_, this->nodeMaxNumRowEntries_);
6920 
6921  std::swap(graph.globalNumEntries_, this->globalNumEntries_);
6922  std::swap(graph.globalMaxNumRowEntries_, this->globalMaxNumRowEntries_);
6923 
6924  std::swap(graph.numAllocForAllRows_, this->numAllocForAllRows_);
6925 
6926  std::swap(graph.rowPtrsPacked_dev_, this->rowPtrsPacked_dev_);
6927  std::swap(graph.rowPtrsPacked_host_, this->rowPtrsPacked_host_);
6928 
6929  std::swap(graph.rowPtrsUnpacked_dev_, this->rowPtrsUnpacked_dev_);
6930  std::swap(graph.rowPtrsUnpacked_host_, this->rowPtrsUnpacked_host_);
6931  std::swap(graph.packedUnpackedRowPtrsMatch_, this->packedUnpackedRowPtrsMatch_);
6932 
6933  std::swap(graph.k_offRankOffsets_, this->k_offRankOffsets_);
6934 
6935  std::swap(graph.lclIndsUnpacked_wdv, this->lclIndsUnpacked_wdv);
6936  std::swap(graph.gblInds_wdv, this->gblInds_wdv);
6937  std::swap(graph.lclIndsPacked_wdv, this->lclIndsPacked_wdv);
6938 
6939  std::swap(graph.storageStatus_, this->storageStatus_);
6940 
6941  std::swap(graph.indicesAreAllocated_, this->indicesAreAllocated_);
6942  std::swap(graph.indicesAreLocal_, this->indicesAreLocal_);
6943  std::swap(graph.indicesAreGlobal_, this->indicesAreGlobal_);
6944  std::swap(graph.fillComplete_, this->fillComplete_);
6945  std::swap(graph.indicesAreSorted_, this->indicesAreSorted_);
6946  std::swap(graph.noRedundancies_, this->noRedundancies_);
6947  std::swap(graph.haveLocalConstants_, this->haveLocalConstants_);
6948  std::swap(graph.haveGlobalConstants_, this->haveGlobalConstants_);
6949  std::swap(graph.haveLocalOffRankOffsets_, this->haveLocalOffRankOffsets_);
6950 
6951  std::swap(graph.sortGhostsAssociatedWithEachProcessor_, this->sortGhostsAssociatedWithEachProcessor_);
6952 
6953  std::swap(graph.k_numAllocPerRow_, this->k_numAllocPerRow_); // View
6954  std::swap(graph.k_numRowEntries_, this->k_numRowEntries_); // View
6955  std::swap(graph.nonlocals_, this->nonlocals_); // std::map
6956 }
6957 
6958 template <class LocalOrdinal, class GlobalOrdinal, class Node>
6961  auto compare_nonlocals = [&](const nonlocals_type& m1, const nonlocals_type& m2) {
6962  bool output = true;
6963  output = m1.size() == m2.size() ? output : false;
6964  for (auto& it_m : m1) {
6965  size_t key = it_m.first;
6966  output = m2.find(key) != m2.end() ? output : false;
6967  if (output) {
6968  auto v1 = m1.find(key)->second;
6969  auto v2 = m2.find(key)->second;
6970  std::sort(v1.begin(), v1.end());
6971  std::sort(v2.begin(), v2.end());
6972 
6973  output = v1.size() == v2.size() ? output : false;
6974  for (size_t i = 0; output && i < v1.size(); i++) {
6975  output = v1[i] == v2[i] ? output : false;
6976  }
6977  }
6978  }
6979  return output;
6980  };
6981 
6982  bool output = true;
6983 
6984  output = this->rowMap_->isSameAs(*(graph.rowMap_)) ? output : false;
6985  output = this->colMap_->isSameAs(*(graph.colMap_)) ? output : false;
6986  output = this->rangeMap_->isSameAs(*(graph.rangeMap_)) ? output : false;
6987  output = this->domainMap_->isSameAs(*(graph.domainMap_)) ? output : false;
6988 
6989  output = this->nodeMaxNumRowEntries_ == graph.nodeMaxNumRowEntries_ ? output : false;
6990 
6991  output = this->globalNumEntries_ == graph.globalNumEntries_ ? output : false;
6992  output = this->globalMaxNumRowEntries_ == graph.globalMaxNumRowEntries_ ? output : false;
6993 
6994  output = this->numAllocForAllRows_ == graph.numAllocForAllRows_ ? output : false;
6995 
6996  output = this->storageStatus_ == graph.storageStatus_ ? output : false; // EStorageStatus is an enum
6997 
6998  output = this->indicesAreAllocated_ == graph.indicesAreAllocated_ ? output : false;
6999  output = this->indicesAreLocal_ == graph.indicesAreLocal_ ? output : false;
7000  output = this->indicesAreGlobal_ == graph.indicesAreGlobal_ ? output : false;
7001  output = this->fillComplete_ == graph.fillComplete_ ? output : false;
7002  output = this->indicesAreSorted_ == graph.indicesAreSorted_ ? output : false;
7003  output = this->noRedundancies_ == graph.noRedundancies_ ? output : false;
7004  output = this->haveLocalConstants_ == graph.haveLocalConstants_ ? output : false;
7005  output = this->haveGlobalConstants_ == graph.haveGlobalConstants_ ? output : false;
7006  output = this->haveLocalOffRankOffsets_ == graph.haveLocalOffRankOffsets_ ? output : false;
7007  output = this->sortGhostsAssociatedWithEachProcessor_ == graph.sortGhostsAssociatedWithEachProcessor_ ? output : false;
7008 
7009  // Compare nonlocals_ -- std::map<GlobalOrdinal, std::vector<GlobalOrdinal> >
7010  // nonlocals_ isa std::map<GO, std::vector<GO> >
7011  output = compare_nonlocals(this->nonlocals_, graph.nonlocals_) ? output : false;
7012 
7013  // Compare k_numAllocPerRow_ isa Kokkos::View::host_mirror_type
7014  // - since this is a host_mirror_type type, it should be in host memory already
7015  output = this->k_numAllocPerRow_.extent(0) == graph.k_numAllocPerRow_.extent(0) ? output : false;
7016  if (output && this->k_numAllocPerRow_.extent(0) > 0) {
7017  for (size_t i = 0; output && i < this->k_numAllocPerRow_.extent(0); i++)
7018  output = this->k_numAllocPerRow_(i) == graph.k_numAllocPerRow_(i) ? output : false;
7019  }
7020 
7021  // Compare k_numRowEntries_ isa Kokkos::View::host_mirror_type
7022  // - since this is a host_mirror_type type, it should be in host memory already
7023  output = this->k_numRowEntries_.extent(0) == graph.k_numRowEntries_.extent(0) ? output : false;
7024  if (output && this->k_numRowEntries_.extent(0) > 0) {
7025  for (size_t i = 0; output && i < this->k_numRowEntries_.extent(0); i++)
7026  output = this->k_numRowEntries_(i) == graph.k_numRowEntries_(i) ? output : false;
7027  }
7028 
7029  // Compare this->k_rowPtrs_ isa Kokkos::View<LocalOrdinal*, ...>
7030  {
7031  auto rowPtrsThis = this->getRowPtrsUnpackedHost();
7032  auto rowPtrsGraph = graph.getRowPtrsUnpackedHost();
7033  output = rowPtrsThis.extent(0) == rowPtrsGraph.extent(0) ? output : false;
7034  for (size_t i = 0; output && i < rowPtrsThis.extent(0); i++)
7035  output = rowPtrsThis(i) == rowPtrsGraph(i) ? output : false;
7036  }
7037 
7038  // Compare lclIndsUnpacked_wdv isa Kokkos::View<LocalOrdinal*, ...>
7039  output = this->lclIndsUnpacked_wdv.extent(0) == graph.lclIndsUnpacked_wdv.extent(0) ? output : false;
7040  if (output && this->lclIndsUnpacked_wdv.extent(0) > 0) {
7041  auto indThis = this->lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
7042  auto indGraph = graph.lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
7043  for (size_t i = 0; output && i < indThis.extent(0); i++)
7044  output = indThis(i) == indGraph(i) ? output : false;
7045  }
7046 
7047  // Compare gblInds_wdv isa Kokkos::View<GlobalOrdinal*, ...>
7048  output = this->gblInds_wdv.extent(0) == graph.gblInds_wdv.extent(0) ? output : false;
7049  if (output && this->gblInds_wdv.extent(0) > 0) {
7050  auto indtThis = this->gblInds_wdv.getHostView(Access::ReadOnly);
7051  auto indtGraph = graph.gblInds_wdv.getHostView(Access::ReadOnly);
7052  for (size_t i = 0; output && i < indtThis.extent(0); i++)
7053  output = indtThis(i) == indtGraph(i) ? output : false;
7054  }
7055 
7056  // Check lclGraph_ isa
7057  // KokkosSparse::StaticCrsGraph<LocalOrdinal, Kokkos::LayoutLeft, execution_space>
7058  // KokkosSparse::StaticCrsGraph has 3 data members in it:
7059  // Kokkos::View<size_type*, ...> row_map
7060  // (local_graph_device_type::row_map_type)
7061  // Kokkos::View<data_type*, ...> entries
7062  // (local_graph_device_type::entries_type)
7063  // Kokkos::View<size_type*, ...> row_block_offsets
7064  // (local_graph_device_type::row_block_type)
7065  // There is currently no KokkosSparse::StaticCrsGraph comparison function
7066  // that's built-in, so we will just compare
7067  // the three data items here. This can be replaced if Kokkos ever
7068  // puts in its own comparison routine.
7069  local_graph_host_type thisLclGraph = this->getLocalGraphHost();
7070  local_graph_host_type graphLclGraph = graph.getLocalGraphHost();
7071 
7072  output = thisLclGraph.row_map.extent(0) == graphLclGraph.row_map.extent(0)
7073  ? output
7074  : false;
7075  if (output && thisLclGraph.row_map.extent(0) > 0) {
7076  auto lclGraph_rowmap_host_this = thisLclGraph.row_map;
7077  auto lclGraph_rowmap_host_graph = graphLclGraph.row_map;
7078  for (size_t i = 0; output && i < lclGraph_rowmap_host_this.extent(0); i++)
7079  output = lclGraph_rowmap_host_this(i) == lclGraph_rowmap_host_graph(i)
7080  ? output
7081  : false;
7082  }
7083 
7084  output = thisLclGraph.entries.extent(0) == graphLclGraph.entries.extent(0)
7085  ? output
7086  : false;
7087  if (output && thisLclGraph.entries.extent(0) > 0) {
7088  auto lclGraph_entries_host_this = thisLclGraph.entries;
7089  auto lclGraph_entries_host_graph = graphLclGraph.entries;
7090  for (size_t i = 0; output && i < lclGraph_entries_host_this.extent(0); i++)
7091  output = lclGraph_entries_host_this(i) == lclGraph_entries_host_graph(i)
7092  ? output
7093  : false;
7094  }
7095 
7096  output =
7097  thisLclGraph.row_block_offsets.extent(0) ==
7098  graphLclGraph.row_block_offsets.extent(0)
7099  ? output
7100  : false;
7101  if (output && thisLclGraph.row_block_offsets.extent(0) > 0) {
7102  auto lclGraph_rbo_host_this = thisLclGraph.row_block_offsets;
7103  auto lclGraph_rbo_host_graph = graphLclGraph.row_block_offsets;
7104  for (size_t i = 0; output && i < lclGraph_rbo_host_this.extent(0); i++)
7105  output = lclGraph_rbo_host_this(i) == lclGraph_rbo_host_graph(i)
7106  ? output
7107  : false;
7108  }
7109 
7110  // For Importer and Exporter, we don't need to explicitly check them since
7111  // they will be consistent with the maps.
7112  // Note: importer_ isa Teuchos::RCP<const import_type>
7113  // exporter_ isa Teuchos::RCP<const export_type>
7114 
7115  return output;
7116 }
7117 
7118 } // namespace Tpetra
7119 
7120 //
7121 // Explicit instantiation macros
7122 //
7123 // Must be expanded from within the Tpetra namespace!
7124 //
7125 
7126 #define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO, GO, NODE) \
7127  template <> \
7128  Teuchos::RCP<CrsGraph<LO, GO, NODE>> \
7129  importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO, GO, NODE>>& sourceGraph, \
7130  const Import<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7131  CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7132  CrsGraph<LO, GO, NODE>::node_type>& importer, \
7133  const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7134  CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7135  CrsGraph<LO, GO, NODE>::node_type>>& domainMap, \
7136  const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7137  CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7138  CrsGraph<LO, GO, NODE>::node_type>>& rangeMap, \
7139  const Teuchos::RCP<Teuchos::ParameterList>& params);
7140 
7141 #define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO, GO, NODE) \
7142  template <> \
7143  Teuchos::RCP<CrsGraph<LO, GO, NODE>> \
7144  importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO, GO, NODE>>& sourceGraph, \
7145  const Import<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7146  CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7147  CrsGraph<LO, GO, NODE>::node_type>& rowImporter, \
7148  const Import<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7149  CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7150  CrsGraph<LO, GO, NODE>::node_type>& domainImporter, \
7151  const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7152  CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7153  CrsGraph<LO, GO, NODE>::node_type>>& domainMap, \
7154  const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7155  CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7156  CrsGraph<LO, GO, NODE>::node_type>>& rangeMap, \
7157  const Teuchos::RCP<Teuchos::ParameterList>& params);
7158 
7159 #define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO, GO, NODE) \
7160  template <> \
7161  Teuchos::RCP<CrsGraph<LO, GO, NODE>> \
7162  exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO, GO, NODE>>& sourceGraph, \
7163  const Export<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7164  CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7165  CrsGraph<LO, GO, NODE>::node_type>& exporter, \
7166  const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7167  CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7168  CrsGraph<LO, GO, NODE>::node_type>>& domainMap, \
7169  const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7170  CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7171  CrsGraph<LO, GO, NODE>::node_type>>& rangeMap, \
7172  const Teuchos::RCP<Teuchos::ParameterList>& params);
7173 
7174 #define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO, GO, NODE) \
7175  template <> \
7176  Teuchos::RCP<CrsGraph<LO, GO, NODE>> \
7177  exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO, GO, NODE>>& sourceGraph, \
7178  const Export<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7179  CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7180  CrsGraph<LO, GO, NODE>::node_type>& rowExporter, \
7181  const Export<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7182  CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7183  CrsGraph<LO, GO, NODE>::node_type>& domainExporter, \
7184  const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7185  CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7186  CrsGraph<LO, GO, NODE>::node_type>>& domainMap, \
7187  const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7188  CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7189  CrsGraph<LO, GO, NODE>::node_type>>& rangeMap, \
7190  const Teuchos::RCP<Teuchos::ParameterList>& params);
7191 
7192 #define TPETRA_CRSGRAPH_INSTANT(LO, GO, NODE) \
7193  template class CrsGraph<LO, GO, NODE>; \
7194  TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO, GO, NODE) \
7195  TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO, GO, NODE) \
7196  TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO, GO, NODE) \
7197  TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO, GO, NODE)
7198 
7199 #endif // TPETRA_CRSGRAPH_DEF_HPP
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
void setDomainRangeMaps(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap)
void setAllIndices(const typename local_graph_device_type::row_map_type &rowPointers, const typename local_graph_device_type::entries_type::non_const_type &columnIndices)
Set the graph&#39;s data directly, using 1-D storage.
Teuchos::RCP< const map_type > getRowMap() const override
Returns the Map that describes the row distribution in this graph.
offset_device_view_type k_offRankOffsets_
The offsets for off-rank entries.
size_t nodeMaxNumRowEntries_
Local maximum of the number of entries in each row.
bool indicesAreSorted_
Whether the graph&#39;s indices are sorted in each row, on this process.
KokkosSparse::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, device_type, void, size_t > local_graph_device_type
The type of the part of the sparse graph on each MPI process.
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types...
Teuchos::RCP< const map_type > rangeMap_
The Map describing the range of the (matrix corresponding to the) graph.
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
local_inds_host_view_type getLocalIndicesHost() const
Get a host view of the packed column indicies.
GlobalOrdinal global_ordinal_type
The type of the graph&#39;s global indices.
local_inds_dualv_type::t_host::const_type getLocalIndsViewHost(const RowInfo &rowinfo) const
Get a const, locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myRo...
An abstract interface for graphs accessed by rows.
void getLocalOffRankOffsets(offset_device_view_type &offsets) const
Get offsets of the off-rank entries in the graph.
void insertGlobalIndicesFiltered(const local_ordinal_type lclRow, const global_ordinal_type gblColInds[], const local_ordinal_type numGblColInds)
Like insertGlobalIndices(), but with column Map filtering.
local_inds_dualv_type::t_host getLocalIndsViewHostNonConst(const RowInfo &rowinfo)
Get a ReadWrite locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(m...
bool sortGhostsAssociatedWithEachProcessor_
Whether to require makeColMap() (and therefore fillComplete()) to order column Map GIDs associated wi...
size_t insertGlobalIndicesImpl(const local_ordinal_type lclRow, const global_ordinal_type inputGblColInds[], const size_t numInputInds)
Insert global indices, using an input local row index.
static KOKKOS_INLINE_FUNCTION size_t unpackValue(LO &outVal, const char inBuf[])
Unpack the given value from the given output buffer.
void resumeFill(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Resume fill operations.
bool haveGlobalConstants_
Whether all processes have computed global constants.
size_t getLocalNumRows() const override
Returns the number of graph rows owned on the calling node.
void expertStaticFillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< const import_type > &importer=Teuchos::null, const Teuchos::RCP< const export_type > &exporter=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Perform a fillComplete on a graph that already has data, via setAllIndices().
Teuchos::RCP< const import_type > getImporter() const override
Returns the importer associated with this graph.
void getLocalDiagOffsets(const Kokkos::View< size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Get offsets of the diagonal entries in the graph.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
void insertGlobalIndices(const global_ordinal_type globalRow, const Teuchos::ArrayView< const global_ordinal_type > &indices)
Insert global indices into the graph.
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Get the number of entries in the given row (local index).
size_t getLocalAllocationSize() const
The local number of indices allocated for the graph, over all rows on the calling (MPI) process...
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const override
Default parameter list suitable for validation.
Declaration of a function that prints strings from each process.
void computeLocalConstants()
Compute local constants, if they have not yet been computed.
void packCrsGraphWithOwningPIDs(const CrsGraph< LO, GO, NT > &sourceGraph, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse graph for communication.
typename row_graph_type::local_inds_device_view_type local_inds_device_view_type
The Kokkos::View type for views of local ordinals on device and host.
bool isIdenticalTo(const CrsGraph< LocalOrdinal, GlobalOrdinal, Node > &graph) const
Create a cloned CrsGraph for a different Node type.
bool isMerged() const
Whether duplicate column indices in each row have been merged.
virtual bool checkSizes(const SrcDistObject &source) override
Compare the source and target (this) objects for compatibility.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
size_t getGlobalMaxNumRowEntries() const override
Maximum number of entries in any row of the graph, over all processes in the graph&#39;s communicator...
Declare and define Tpetra::Details::copyOffsets, an implementation detail of Tpetra (in particular...
bool noRedundancies_
Whether the graph&#39;s indices are non-redundant (merged) in each row, on this process.
global_inds_dualv_type::t_dev::const_type getGlobalIndsViewDevice(const RowInfo &rowinfo) const
Get a const, globally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myR...
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices_wdv, const Padding &padding, const int my_rank, const bool verbose)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries...
OffsetsViewType::non_const_value_type computeOffsetsFromConstantCount(const OffsetsViewType &ptr, const CountType count)
Compute offsets from a constant count.
void replaceRangeMap(const Teuchos::RCP< const map_type > &newRangeMap)
Replace the current Range Map with the given objects.
int makeColMap(Teuchos::RCP< const Tpetra::Map< LO, GO, NT >> &colMap, Teuchos::Array< int > &remotePIDs, const Teuchos::RCP< const Tpetra::Map< LO, GO, NT >> &domMap, const RowGraph< LO, GO, NT > &graph, const bool sortEachProcsGids=true, std::ostream *errStrm=NULL)
Make the graph&#39;s column Map.
void packCrsGraph(const CrsGraph< LO, GO, NT > &sourceGraph, Teuchos::Array< typename CrsGraph< LO, GO, NT >::packet_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse graph for communication.
row_ptrs_device_view_type getLocalRowPtrsDevice() const
Get a device view of the packed row offsets.
global_inds_dualv_type::t_host::const_type getGlobalIndsViewHost(const RowInfo &rowinfo) const
Get a const, globally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myR...
const row_ptrs_host_view_type & getRowPtrsPackedHost() const
Get the packed row pointers on host. Lazily make a copy from device.
static bool debug()
Whether Tpetra is in debug mode.
size_t getNumAllocatedEntriesInGlobalRow(global_ordinal_type globalRow) const
Current number of allocated entries in the given row on the calling (MPI) process, using a global row index.
Allocation information for a locally owned row in a CrsGraph or CrsMatrix.
local_inds_wdv_type lclIndsUnpacked_wdv
Local ordinals of column indices for all rows Valid when isLocallyIndexed is true If OptimizedStorage...
void replaceRangeMapAndExporter(const Teuchos::RCP< const map_type > &newRangeMap, const Teuchos::RCP< const export_type > &newExporter)
Replace the current Range Map and Export with the given parameters.
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
bool hasColMap() const override
Whether the graph has a column Map.
bool isGloballyIndexed() const override
Whether the graph&#39;s column indices are stored as global indices.
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode) override
void exportAndFillComplete(Teuchos::RCP< CrsGraph< local_ordinal_type, global_ordinal_type, Node >> &destGraph, const export_type &exporter, const Teuchos::RCP< const map_type > &domainMap=Teuchos::null, const Teuchos::RCP< const map_type > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Export from this to the given destination graph, and make the result fill complete.
Teuchos_Ordinal Array_size_type
Size type for Teuchos Array objects.
int local_ordinal_type
Default value of Scalar template parameter.
bool isStorageOptimized() const
Returns true if storage has been optimized.
bool haveLocalConstants_
Whether this process has computed local constants.
size_t findGlobalIndices(const RowInfo &rowInfo, const Teuchos::ArrayView< const global_ordinal_type > &indices, std::function< void(const size_t, const size_t, const size_t)> fun) const
Finds indices in the given row.
std::string description() const override
Return a one-line human-readable description of this object.
void sort(View &view, const size_t &size)
Convenience wrapper for std::sort for host-accessible views.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator, in rank order.
void makeColMap(Teuchos::Array< int > &remotePIDs)
Make and set the graph&#39;s column Map.
Teuchos::RCP< const export_type > getExporter() const override
Returns the exporter associated with this graph.
std::pair< size_t, std::string > makeIndicesLocal(const bool verbose=false)
Convert column indices from global to local.
size_t global_size_t
Global size_t object.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap) override
Remove processes owning zero rows from the Maps and their communicator.
Node node_type
This class&#39; Kokkos Node type.
void reindexColumns(const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortIndicesInEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
void makeImportExport(Teuchos::Array< int > &remotePIDs, const bool useRemotePIDs)
Make the Import and Export objects, if needed.
size_t sortAndMergeRowIndices(const RowInfo &rowInfo, const bool sorted, const bool merged)
Sort and merge duplicate column indices in the given row.
void insertLocalIndices(const local_ordinal_type localRow, const Teuchos::ArrayView< const local_ordinal_type > &indices)
Insert local indices into the graph.
Insert new values that don&#39;t currently exist.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
void getLocalRowCopy(local_ordinal_type gblRow, nonconst_local_inds_host_view_type &gblColInds, size_t &numColInds) const override
Get a copy of the given row, using local indices.
Teuchos::RCP< const map_type > rowMap_
The Map describing the distribution of rows of the graph.
Teuchos::RCP< const import_type > importer_
The Import from the domain Map to the column Map.
size_t insertCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, InOutIndices &curIndices, size_t &numAssigned, InIndices const &newIndices, std::function< void(const size_t, const size_t, const size_t)> cb=std::function< void(const size_t, const size_t, const size_t)>())
Insert new indices in to current list of indices.
num_row_entries_type k_numRowEntries_
The number of local entries in each locally owned row.
local_inds_device_view_type getLocalIndicesDevice() const
Get a device view of the packed column indicies.
Teuchos::RCP< const map_type > domainMap_
The Map describing the domain of the (matrix corresponding to the) graph.
OffsetType convertColumnIndicesFromGlobalToLocal(const Kokkos::View< LO *, DT > &lclColInds, const Kokkos::View< const GO *, DT > &gblColInds, const Kokkos::View< const OffsetType *, DT > &ptr, const LocalMap< LO, GO, DT > &lclColMap, const Kokkos::View< const NumEntType *, DT > &numRowEnt)
Convert a CrsGraph&#39;s global column indices into local column indices.
global_size_t getGlobalNumRows() const override
Returns the number of global rows in the graph.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
Functions for manipulating CRS arrays.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
void setLocallyModified()
Report that we made a local modification to its structure.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
void checkInternalState() const
Throw an exception if the internal state is not consistent.
void importAndFillComplete(Teuchos::RCP< CrsGraph< local_ordinal_type, global_ordinal_type, Node >> &destGraph, const import_type &importer, const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Import from this to the given destination graph, and make the result fill complete.
bool isNodeGlobalElement(global_ordinal_type globalIndex) const
Whether the given global index is owned by this Map on the calling process.
Sets up and executes a communication plan for a Tpetra DistObject.
size_t findCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, const size_t curNumEntries, Indices1 const &curIndices, Indices2 const &newIndices, Callback &&cb)
Finds offsets in to current list of indices.
static bool verbose()
Whether Tpetra is in verbose mode.
CombineMode
Rule for combining data in an Import or Export.
Teuchos::RCP< const export_type > exporter_
The Export from the row Map to the range Map.
void insertGlobalIndicesIntoNonownedRows(const global_ordinal_type gblRow, const global_ordinal_type gblColInds[], const local_ordinal_type numGblColInds)
Implementation of insertGlobalIndices for nonowned rows.
bool isFillComplete() const override
Whether fillComplete() has been called and the graph is in compute mode.
size_t getNumAllocatedEntriesInLocalRow(local_ordinal_type localRow) const
Current number of allocated entries in the given row on the calling (MPI) process, using a local row index.
void swap(CrsGraph< local_ordinal_type, global_ordinal_type, Node > &graph)
Swaps the data from *this with the data and maps from graph.
void globalAssemble()
Communicate nonlocal contributions to other processes.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node >> &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified, possibly nondefault Kokkos Node type.
RowInfo getRowInfoFromGlobalRowIndex(const global_ordinal_type gblRow) const
Get information about the locally owned row with global index gblRow.
Utility functions for packing and unpacking sparse matrix entries.
size_t getLocalNumCols() const override
Returns the number of columns connected to the locally owned rows of this graph.
size_t getLocalNumEntries() const override
The local number of entries in the graph.
Abstract base class for objects that can be the source of an Import or Export operation.
void removeLocalIndices(local_ordinal_type localRow)
Remove all graph indices from the specified local row.
LocalOrdinal local_ordinal_type
The type of the graph&#39;s local indices.
global_size_t globalNumEntries_
Global number of entries in the graph.
global_inds_wdv_type gblInds_wdv
Global ordinals of column indices for all rows.
local_inds_dualv_type::t_dev::const_type getLocalIndsViewDevice(const RowInfo &rowinfo) const
Get a const, locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myRo...
Teuchos::RCP< const map_type > getRangeMap() const override
Returns the Map associated with the domain of this graph.
virtual void pack(const Teuchos::ArrayView< const local_ordinal_type > &exportLIDs, Teuchos::Array< global_ordinal_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, size_t &constantNumPackets) const override
void enableWDVTracking()
Enable WrappedDualView reference-count tracking and syncing. Call this after exiting a host-parallel ...
row_ptrs_host_view_type getLocalRowPtrsHost() const
Get a host view of the packed row offsets.
void packCrsGraphNew(const CrsGraph< LO, GO, NT > &sourceGraph, const Kokkos::DualView< const LO *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportLIDs, const Kokkos::DualView< const int *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportPIDs, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports, Kokkos::DualView< size_t *, typename CrsGraph< LO, GO, NT >::buffer_device_type > numPacketsPerLID, size_t &constantNumPackets, const bool pack_pids)
Pack specified entries of the given local sparse graph for communication, for &quot;new&quot; DistObject interf...
const row_ptrs_host_view_type & getRowPtrsUnpackedHost() const
Get the unpacked row pointers on host. Lazily make a copy from device.
void replaceColMap(const Teuchos::RCP< const map_type > &newColMap)
Replace the graph&#39;s current column Map with the given Map.
global_size_t getGlobalNumCols() const override
Returns the number of global columns in the graph.
size_t getLocalMaxNumRowEntries() const override
Maximum number of entries in any row of the graph, on this process.
RowInfo getRowInfo(const local_ordinal_type myRow) const
Get information about the locally owned row with local index myRow.
void replaceDomainMapAndImporter(const Teuchos::RCP< const map_type > &newDomainMap, const Teuchos::RCP< const import_type > &newImporter)
Replace the current domain Map and Import with the given parameters.
bool supportsRowViews() const override
Whether this class implements getLocalRowView() and getGlobalRowView() (it does). ...
typename dist_object_type::buffer_device_type buffer_device_type
Kokkos::Device specialization for communication buffers.
local_map_type getLocalMap() const
Get the LocalMap for Kokkos-Kernels.
local_inds_wdv_type lclIndsPacked_wdv
Local ordinals of column indices for all rows Valid when isLocallyIndexed is true Built during fillCo...
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
Details::EStorageStatus storageStatus_
Status of the graph&#39;s storage, when not in a fill-complete state.
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks...
const row_ptrs_device_view_type & getRowPtrsPackedDevice() const
Get the packed row pointers on device.
A parallel distribution of indices over processes.
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const LO &)
Number of bytes required to pack or unpack the given value of type value_type.
const row_ptrs_device_view_type & getRowPtrsUnpackedDevice() const
Get the unpacked row pointers on device.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects...
void setParameterList(const Teuchos::RCP< Teuchos::ParameterList > &params) override
Set the given list of parameters (must be nonnull).
size_t getNumEntriesInGlobalRow(global_ordinal_type globalRow) const override
Returns the current number of entries on this node in the specified global row.
void fillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Tell the graph that you are done changing its structure.
void disableWDVTracking()
Disable WrappedDualView reference-count tracking and syncing. Call this before entering a host-parall...
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
void getLocalRowView(const LocalOrdinal lclRow, local_inds_host_view_type &lclColInds) const override
Get a const view of the given local row&#39;s local column indices.
typename Node::device_type device_type
This class&#39; Kokkos device type.
bool isLocallyIndexed() const override
Whether the graph&#39;s column indices are stored as local indices.
A distributed dense vector.
Stand-alone utility functions and macros.
bool isFillActive() const
Whether resumeFill() has been called and the graph is in edit mode.
virtual void copyAndPermute(const SrcDistObject &source, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM) override
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
Returns the communicator.
Kokkos::View< const size_t *, device_type >::host_mirror_type k_numAllocPerRow_
The maximum number of entries to allow in each locally owned row, per row.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
global_size_t globalMaxNumRowEntries_
Global maximum of the number of entries in each row.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
size_t insertIndices(RowInfo &rowInfo, const SLocalGlobalViews &newInds, const ELocalGlobal lg, const ELocalGlobal I)
Insert indices into the given row.
global_size_t getGlobalNumEntries() const override
Returns the global number of entries in the graph.
Declaration and definition of Tpetra::Details::getEntryOnHost.
size_t numAllocForAllRows_
The maximum number of entries to allow in each locally owned row.
local_graph_device_type getLocalGraphDevice() const
Get the local graph.
void getGlobalRowView(const global_ordinal_type gblRow, global_inds_host_view_type &gblColInds) const override
Get a const view of the given global row&#39;s global column indices.
Teuchos::RCP< const map_type > colMap_
The Map describing the distribution of columns of the graph.
global_ordinal_type getIndexBase() const override
Returns the index base for global indices for this graph.
typename local_graph_device_type::HostMirror local_graph_host_type
The type of the part of the sparse graph on each MPI process.
void replaceDomainMap(const Teuchos::RCP< const map_type > &newDomainMap)
Replace the current domain Map with the given objects.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
Definition: Tpetra_Util.cpp:69
nonlocals_type nonlocals_
Nonlocal data given to insertGlobalIndices.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const override
Print this object to the given output stream with the given verbosity level.
void computeGlobalConstants()
Compute global constants, if they have not yet been computed.
void getGlobalRowCopy(global_ordinal_type gblRow, nonconst_global_inds_host_view_type &gblColInds, size_t &numColInds) const override
Get a copy of the given row, using global indices.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.