Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_CrsGraph_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_CRSGRAPH_DEF_HPP
11 #define TPETRA_CRSGRAPH_DEF_HPP
12 
15 
20 #include "Tpetra_Details_getGraphDiagOffsets.hpp"
21 #include "Tpetra_Details_getGraphOffRankOffsets.hpp"
22 #include "Tpetra_Details_makeColMap.hpp"
26 #include "Tpetra_Distributor.hpp"
27 #include "Teuchos_SerialDenseMatrix.hpp"
28 #include "Tpetra_Vector.hpp"
29 #include "Tpetra_Import_Util.hpp"
30 #include "Tpetra_Import_Util2.hpp"
31 #include "Tpetra_Details_packCrsGraph.hpp"
32 #include "Tpetra_Details_unpackCrsGraphAndCombine.hpp"
33 #include "Tpetra_Details_CrsPadding.hpp"
34 #include "Tpetra_Util.hpp"
35 #include <algorithm>
36 #include <limits>
37 #include <map>
38 #include <sstream>
39 #include <string>
40 #include <type_traits>
41 #include <utility>
42 #include <vector>
43 
44 namespace Tpetra {
45  namespace Details {
46  namespace Impl {
47 
48  template<class MapIter>
49  void
50  verbosePrintMap(std::ostream& out,
51  MapIter beg,
52  MapIter end,
53  const size_t numEnt,
54  const char mapName[])
55  {
56  using ::Tpetra::Details::Behavior;
58 
59  out << mapName << ": {";
60  const size_t maxNumToPrint =
62  if (maxNumToPrint == 0) {
63  if (numEnt != 0) {
64  out << "...";
65  }
66  }
67  else {
68  const size_t numToPrint = numEnt > maxNumToPrint ?
69  maxNumToPrint : numEnt;
70  size_t count = 0;
71  for (MapIter it = beg; it != end; ++it) {
72  out << "(" << (*it).first << ", ";
73  verbosePrintArray(out, (*it).second, "gblColInds",
74  maxNumToPrint);
75  out << ")";
76  if (count + size_t(1) < numToPrint) {
77  out << ", ";
78  }
79  ++count;
80  }
81  if (count < numEnt) {
82  out << ", ...";
83  }
84  }
85  out << "}";
86  }
87 
88  template<class LO, class GO, class Node>
89  Teuchos::ArrayView<GO>
90  getRowGraphGlobalRow(
91  std::vector<GO>& gblColIndsStorage,
92  const RowGraph<LO, GO, Node>& graph,
93  const GO gblRowInd)
94  {
95  size_t origNumEnt = graph.getNumEntriesInGlobalRow(gblRowInd);
96  if (gblColIndsStorage.size() < origNumEnt) {
97  gblColIndsStorage.resize(origNumEnt);
98  }
99  typename CrsGraph<LO,GO,Node>::nonconst_global_inds_host_view_type gblColInds(gblColIndsStorage.data(),
100  origNumEnt);
101  graph.getGlobalRowCopy(gblRowInd, gblColInds, origNumEnt);
102  Teuchos::ArrayView<GO> retval(gblColIndsStorage.data(),origNumEnt);
103  return retval;
104  }
105 
106  template<class LO, class GO, class DT, class OffsetType, class NumEntType>
107  class ConvertColumnIndicesFromGlobalToLocal {
108  public:
109  ConvertColumnIndicesFromGlobalToLocal (const ::Kokkos::View<LO*, DT>& lclColInds,
110  const ::Kokkos::View<const GO*, DT>& gblColInds,
111  const ::Kokkos::View<const OffsetType*, DT>& ptr,
112  const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
113  const ::Kokkos::View<const NumEntType*, DT>& numRowEnt) :
114  lclColInds_ (lclColInds),
115  gblColInds_ (gblColInds),
116  ptr_ (ptr),
117  lclColMap_ (lclColMap),
118  numRowEnt_ (numRowEnt)
119  {}
120 
121  KOKKOS_FUNCTION void
122  operator () (const LO& lclRow, OffsetType& curNumBad) const
123  {
124  const OffsetType offset = ptr_(lclRow);
125  // NOTE (mfh 26 Jun 2016) It's always legal to cast the number
126  // of entries in a row to LO, as long as the row doesn't have
127  // too many duplicate entries.
128  const LO numEnt = static_cast<LO> (numRowEnt_(lclRow));
129  for (LO j = 0; j < numEnt; ++j) {
130  const GO gid = gblColInds_(offset + j);
131  const LO lid = lclColMap_.getLocalElement (gid);
132  lclColInds_(offset + j) = lid;
133  if (lid == ::Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
134  ++curNumBad;
135  }
136  }
137  }
138 
139  static OffsetType
140  run (const ::Kokkos::View<LO*, DT>& lclColInds,
141  const ::Kokkos::View<const GO*, DT>& gblColInds,
142  const ::Kokkos::View<const OffsetType*, DT>& ptr,
143  const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
144  const ::Kokkos::View<const NumEntType*, DT>& numRowEnt)
145  {
146  typedef ::Kokkos::RangePolicy<typename DT::execution_space, LO> range_type;
147  typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> functor_type;
148 
149  const LO lclNumRows = ptr.extent (0) == 0 ?
150  static_cast<LO> (0) : static_cast<LO> (ptr.extent (0) - 1);
151  OffsetType numBad = 0;
152  // Count of "bad" column indices is a reduction over rows.
153  ::Kokkos::parallel_reduce (range_type (0, lclNumRows),
154  functor_type (lclColInds, gblColInds, ptr,
155  lclColMap, numRowEnt),
156  numBad);
157  return numBad;
158  }
159 
160  private:
161  ::Kokkos::View<LO*, DT> lclColInds_;
162  ::Kokkos::View<const GO*, DT> gblColInds_;
163  ::Kokkos::View<const OffsetType*, DT> ptr_;
165  ::Kokkos::View<const NumEntType*, DT> numRowEnt_;
166  };
167 
168  } // namespace Impl
169 
184  template<class LO, class GO, class DT, class OffsetType, class NumEntType>
185  OffsetType
186  convertColumnIndicesFromGlobalToLocal (const Kokkos::View<LO*, DT>& lclColInds,
187  const Kokkos::View<const GO*, DT>& gblColInds,
188  const Kokkos::View<const OffsetType*, DT>& ptr,
189  const LocalMap<LO, GO, DT>& lclColMap,
190  const Kokkos::View<const NumEntType*, DT>& numRowEnt)
191  {
192  using Impl::ConvertColumnIndicesFromGlobalToLocal;
193  typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> impl_type;
194  return impl_type::run (lclColInds, gblColInds, ptr, lclColMap, numRowEnt);
195  }
196 
197  template<class ViewType, class LO>
198  class MaxDifference {
199  public:
200  MaxDifference (const ViewType& ptr) : ptr_ (ptr) {}
201 
202  KOKKOS_INLINE_FUNCTION void init (LO& dst) const {
203  dst = 0;
204  }
205 
206  KOKKOS_INLINE_FUNCTION void
207  join (LO& dst, const LO& src) const
208  {
209  dst = (src > dst) ? src : dst;
210  }
211 
212  KOKKOS_INLINE_FUNCTION void
213  operator () (const LO lclRow, LO& maxNumEnt) const
214  {
215  const LO numEnt = static_cast<LO> (ptr_(lclRow+1) - ptr_(lclRow));
216  maxNumEnt = (numEnt > maxNumEnt) ? numEnt : maxNumEnt;
217  }
218  private:
219  typename ViewType::const_type ptr_;
220  };
221 
222  template<class ViewType, class LO>
223  typename ViewType::non_const_value_type
224  maxDifference (const char kernelLabel[],
225  const ViewType& ptr,
226  const LO lclNumRows)
227  {
228  if (lclNumRows == 0) {
229  // mfh 07 May 2018: Weirdly, I need this special case,
230  // otherwise I get the wrong answer.
231  return static_cast<LO> (0);
232  }
233  else {
234  using execution_space = typename ViewType::execution_space;
235  using range_type = Kokkos::RangePolicy<execution_space, LO>;
236  LO theMaxNumEnt {0};
237  Kokkos::parallel_reduce (kernelLabel,
238  range_type (0, lclNumRows),
239  MaxDifference<ViewType, LO> (ptr),
240  theMaxNumEnt);
241  return theMaxNumEnt;
242  }
243  }
244 
245  } // namespace Details
246 
247  template <class LocalOrdinal, class GlobalOrdinal, class Node>
248  bool
249  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
250  getDebug() {
251  return Details::Behavior::debug("CrsGraph");
252  }
253 
254  template <class LocalOrdinal, class GlobalOrdinal, class Node>
255  bool
256  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
257  getVerbose() {
258  return Details::Behavior::verbose("CrsGraph");
259  }
260 
261  template <class LocalOrdinal, class GlobalOrdinal, class Node>
262  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
263  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
264  const size_t maxNumEntriesPerRow,
265  const Teuchos::RCP<Teuchos::ParameterList>& params) :
266  dist_object_type (rowMap)
267  , rowMap_ (rowMap)
268  , numAllocForAllRows_ (maxNumEntriesPerRow)
269  {
270  const char tfecfFuncName[] =
271  "CrsGraph(rowMap,maxNumEntriesPerRow,params): ";
272  staticAssertions ();
273  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
274  (maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid (),
275  std::invalid_argument, "The allocation hint maxNumEntriesPerRow must be "
276  "a valid size_t value, which in this case means it must not be "
277  "Teuchos::OrdinalTraits<size_t>::invalid().");
278  resumeFill (params);
280  }
281 
282  template <class LocalOrdinal, class GlobalOrdinal, class Node>
284  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
285  const Teuchos::RCP<const map_type>& colMap,
286  const size_t maxNumEntriesPerRow,
287  const Teuchos::RCP<Teuchos::ParameterList>& params) :
288  dist_object_type (rowMap)
289  , rowMap_ (rowMap)
290  , colMap_ (colMap)
291  , numAllocForAllRows_ (maxNumEntriesPerRow)
292  {
293  const char tfecfFuncName[] =
294  "CrsGraph(rowMap,colMap,maxNumEntriesPerRow,params): ";
295  staticAssertions ();
296  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
297  maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid (),
298  std::invalid_argument, "The allocation hint maxNumEntriesPerRow must be "
299  "a valid size_t value, which in this case means it must not be "
300  "Teuchos::OrdinalTraits<size_t>::invalid().");
301  resumeFill (params);
303  }
304 
305 
306  template <class LocalOrdinal, class GlobalOrdinal, class Node>
308  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
309  const Teuchos::ArrayView<const size_t>& numEntPerRow,
310  const Teuchos::RCP<Teuchos::ParameterList>& params) :
311  dist_object_type (rowMap)
312  , rowMap_ (rowMap)
313  , numAllocForAllRows_ (0)
314  {
315  const char tfecfFuncName[] =
316  "CrsGraph(rowMap,numEntPerRow,params): ";
317  staticAssertions ();
318 
319  const size_t lclNumRows = rowMap.is_null () ?
320  static_cast<size_t> (0) : rowMap->getLocalNumElements ();
321  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
322  static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
323  std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
324  << " != the local number of rows " << lclNumRows << " as specified by "
325  "the input row Map.");
326 
327  if (debug_) {
328  for (size_t r = 0; r < lclNumRows; ++r) {
329  const size_t curRowCount = numEntPerRow[r];
330  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
331  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
332  std::invalid_argument, "numEntPerRow(" << r << ") "
333  "specifies an invalid number of entries "
334  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
335  }
336  }
337 
338  // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
339  // The latter is a const View, so we have to copy into a nonconst
340  // View first, then assign.
341  typedef decltype (k_numAllocPerRow_) out_view_type;
342  typedef typename out_view_type::non_const_type nc_view_type;
343  typedef Kokkos::View<const size_t*,
344  typename nc_view_type::array_layout,
345  Kokkos::HostSpace,
346  Kokkos::MemoryUnmanaged> in_view_type;
347  in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
348  nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
349  lclNumRows);
350  // DEEP_COPY REVIEW - HOST-TO-HOSTMIRROR
351  using exec_space = typename nc_view_type::execution_space;
352  Kokkos::deep_copy (exec_space(), numAllocPerRowOut, numAllocPerRowIn);
353  k_numAllocPerRow_ = numAllocPerRowOut;
354 
355  resumeFill (params);
357  }
358 
359 
360 
361  template <class LocalOrdinal, class GlobalOrdinal, class Node>
363  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
364  const Kokkos::DualView<const size_t*, device_type>& numEntPerRow,
365  const Teuchos::RCP<Teuchos::ParameterList>& params) :
366  dist_object_type (rowMap)
367  , rowMap_ (rowMap)
368  , k_numAllocPerRow_ (numEntPerRow.h_view)
369  , numAllocForAllRows_ (0)
370  {
371  const char tfecfFuncName[] =
372  "CrsGraph(rowMap,numEntPerRow,params): ";
373  staticAssertions ();
374 
375  const size_t lclNumRows = rowMap.is_null () ?
376  static_cast<size_t> (0) : rowMap->getLocalNumElements ();
377  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
378  static_cast<size_t> (numEntPerRow.extent (0)) != lclNumRows,
379  std::invalid_argument, "numEntPerRow has length " <<
380  numEntPerRow.extent (0) << " != the local number of rows " <<
381  lclNumRows << " as specified by " "the input row Map.");
382 
383  if (debug_) {
384  for (size_t r = 0; r < lclNumRows; ++r) {
385  const size_t curRowCount = numEntPerRow.h_view(r);
386  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
387  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
388  std::invalid_argument, "numEntPerRow(" << r << ") "
389  "specifies an invalid number of entries "
390  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
391  }
392  }
393 
394  resumeFill (params);
396  }
397 
398 
399  template <class LocalOrdinal, class GlobalOrdinal, class Node>
401  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
402  const Teuchos::RCP<const map_type>& colMap,
403  const Kokkos::DualView<const size_t*, device_type>& numEntPerRow,
404  const Teuchos::RCP<Teuchos::ParameterList>& params) :
405  dist_object_type (rowMap)
406  , rowMap_ (rowMap)
407  , colMap_ (colMap)
408  , k_numAllocPerRow_ (numEntPerRow.h_view)
409  , numAllocForAllRows_ (0)
410  {
411  const char tfecfFuncName[] =
412  "CrsGraph(rowMap,colMap,numEntPerRow,params): ";
413  staticAssertions ();
414 
415  const size_t lclNumRows = rowMap.is_null () ?
416  static_cast<size_t> (0) : rowMap->getLocalNumElements ();
417  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
418  static_cast<size_t> (numEntPerRow.extent (0)) != lclNumRows,
419  std::invalid_argument, "numEntPerRow has length " <<
420  numEntPerRow.extent (0) << " != the local number of rows " <<
421  lclNumRows << " as specified by " "the input row Map.");
422 
423  if (debug_) {
424  for (size_t r = 0; r < lclNumRows; ++r) {
425  const size_t curRowCount = numEntPerRow.h_view(r);
426  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
427  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
428  std::invalid_argument, "numEntPerRow(" << r << ") "
429  "specifies an invalid number of entries "
430  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
431  }
432  }
433 
434  resumeFill (params);
436  }
437 
438 
439  template <class LocalOrdinal, class GlobalOrdinal, class Node>
441  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
442  const Teuchos::RCP<const map_type>& colMap,
443  const Teuchos::ArrayView<const size_t>& numEntPerRow,
444  const Teuchos::RCP<Teuchos::ParameterList>& params) :
445  dist_object_type (rowMap)
446  , rowMap_ (rowMap)
447  , colMap_ (colMap)
448  , numAllocForAllRows_ (0)
449  {
450  const char tfecfFuncName[] =
451  "CrsGraph(rowMap,colMap,numEntPerRow,params): ";
452  staticAssertions ();
453 
454  const size_t lclNumRows = rowMap.is_null () ?
455  static_cast<size_t> (0) : rowMap->getLocalNumElements ();
456  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
457  static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
458  std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
459  << " != the local number of rows " << lclNumRows << " as specified by "
460  "the input row Map.");
461 
462  if (debug_) {
463  for (size_t r = 0; r < lclNumRows; ++r) {
464  const size_t curRowCount = numEntPerRow[r];
465  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
466  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
467  std::invalid_argument, "numEntPerRow(" << r << ") "
468  "specifies an invalid number of entries "
469  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
470  }
471  }
472 
473  // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
474  // The latter is a const View, so we have to copy into a nonconst
475  // View first, then assign.
476  typedef decltype (k_numAllocPerRow_) out_view_type;
477  typedef typename out_view_type::non_const_type nc_view_type;
478  typedef Kokkos::View<const size_t*,
479  typename nc_view_type::array_layout,
480  Kokkos::HostSpace,
481  Kokkos::MemoryUnmanaged> in_view_type;
482  in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
483  nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
484  lclNumRows);
485  // DEEP_COPY REVIEW - HOST-TO-HOSTMIRROR
486  using exec_space = typename nc_view_type::execution_space;
487  Kokkos::deep_copy (exec_space(), numAllocPerRowOut, numAllocPerRowIn);
488  k_numAllocPerRow_ = numAllocPerRowOut;
489 
490  resumeFill (params);
492  }
493 
494 
495  template <class LocalOrdinal, class GlobalOrdinal, class Node>
498  const Teuchos::RCP<const map_type>& rowMap,
499  const Teuchos::RCP<Teuchos::ParameterList>& params) :
500  dist_object_type (rowMap)
501  , rowMap_(rowMap)
502  , colMap_(originalGraph.colMap_)
503  , numAllocForAllRows_(originalGraph.numAllocForAllRows_)
504  , storageStatus_(originalGraph.storageStatus_)
505  , indicesAreAllocated_(originalGraph.indicesAreAllocated_)
506  , indicesAreLocal_(originalGraph.indicesAreLocal_)
507  , indicesAreSorted_(originalGraph.indicesAreSorted_)
508  {
509  staticAssertions();
510 
511  int numRows = rowMap->getLocalNumElements();
512  size_t numNonZeros = originalGraph.getRowPtrsPackedHost()(numRows);
513  auto rowsToUse = Kokkos::pair<size_t, size_t>(0, numRows+1);
514 
515 
516  this->setRowPtrsUnpacked(Kokkos::subview(originalGraph.getRowPtrsUnpackedDevice(), rowsToUse));
517  this->setRowPtrsPacked(Kokkos::subview(originalGraph.getRowPtrsPackedDevice(), rowsToUse));
518 
519  if (indicesAreLocal_) {
520  lclIndsUnpacked_wdv = local_inds_wdv_type(originalGraph.lclIndsUnpacked_wdv, 0, numNonZeros);
521  lclIndsPacked_wdv = local_inds_wdv_type(originalGraph.lclIndsPacked_wdv, 0, numNonZeros);
522  }
523  else {
524  gblInds_wdv = global_inds_wdv_type(originalGraph.gblInds_wdv, 0, numNonZeros);
525  }
526 
528  }
529 
530  template <class LocalOrdinal, class GlobalOrdinal, class Node>
532  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
533  const Teuchos::RCP<const map_type>& colMap,
534  const typename local_graph_device_type::row_map_type& rowPointers,
535  const typename local_graph_device_type::entries_type::non_const_type& columnIndices,
536  const Teuchos::RCP<Teuchos::ParameterList>& params) :
537  dist_object_type (rowMap)
538  , rowMap_(rowMap)
539  , colMap_(colMap)
540  , numAllocForAllRows_(0)
541  , storageStatus_(Details::STORAGE_1D_PACKED)
542  , indicesAreAllocated_(true)
543  , indicesAreLocal_(true)
544  {
545  staticAssertions ();
546  if (! params.is_null() && params->isParameter("sorted") &&
547  ! params->get<bool>("sorted")) {
548  indicesAreSorted_ = false;
549  }
550  else {
551  indicesAreSorted_ = true;
552  }
553  setAllIndices (rowPointers, columnIndices);
555  }
556 
557  template <class LocalOrdinal, class GlobalOrdinal, class Node>
559  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
560  const Teuchos::RCP<const map_type>& colMap,
561  const Teuchos::ArrayRCP<size_t>& rowPointers,
562  const Teuchos::ArrayRCP<LocalOrdinal> & columnIndices,
563  const Teuchos::RCP<Teuchos::ParameterList>& params) :
564  dist_object_type (rowMap)
565  , rowMap_ (rowMap)
566  , colMap_ (colMap)
567  , numAllocForAllRows_ (0)
568  , storageStatus_ (Details::STORAGE_1D_PACKED)
569  , indicesAreAllocated_ (true)
570  , indicesAreLocal_ (true)
571  {
572  staticAssertions ();
573  if (! params.is_null() && params->isParameter("sorted") &&
574  ! params->get<bool>("sorted")) {
575  indicesAreSorted_ = false;
576  }
577  else {
578  indicesAreSorted_ = true;
579  }
580  setAllIndices (rowPointers, columnIndices);
582  }
583 
584  template <class LocalOrdinal, class GlobalOrdinal, class Node>
586  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
587  const Teuchos::RCP<const map_type>& colMap,
588  const local_graph_device_type& k_local_graph_,
589  const Teuchos::RCP<Teuchos::ParameterList>& params)
590  : CrsGraph (k_local_graph_,
591  rowMap,
592  colMap,
593  Teuchos::null,
594  Teuchos::null,
595  params)
596  {}
597 
598  template <class LocalOrdinal, class GlobalOrdinal, class Node>
600  CrsGraph (const local_graph_device_type& k_local_graph_,
601  const Teuchos::RCP<const map_type>& rowMap,
602  const Teuchos::RCP<const map_type>& colMap,
603  const Teuchos::RCP<const map_type>& domainMap,
604  const Teuchos::RCP<const map_type>& rangeMap,
605  const Teuchos::RCP<Teuchos::ParameterList>& params)
606  : DistObject<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, node_type> (rowMap)
607  , rowMap_ (rowMap)
608  , colMap_ (colMap)
609  , numAllocForAllRows_ (0)
610  , storageStatus_ (Details::STORAGE_1D_PACKED)
611  , indicesAreAllocated_ (true)
612  , indicesAreLocal_ (true)
613  {
614  staticAssertions();
615  const char tfecfFuncName[] = "CrsGraph(Kokkos::LocalStaticCrsGraph,Map,Map,Map,Map)";
616 
617  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
618  colMap.is_null (), std::runtime_error,
619  ": The input column Map must be nonnull.");
620  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
621  k_local_graph_.numRows () != rowMap->getLocalNumElements (),
622  std::runtime_error,
623  ": The input row Map and the input local graph need to have the same "
624  "number of rows. The row Map claims " << rowMap->getLocalNumElements ()
625  << " row(s), but the local graph claims " << k_local_graph_.numRows ()
626  << " row(s).");
627 
628  // NOTE (mfh 17 Mar 2014) getLocalNumRows() returns
629  // rowMap_->getLocalNumElements(), but it doesn't have to.
630  // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
631  // k_local_graph_.numRows () != getLocalNumRows (), std::runtime_error,
632  // ": The input row Map and the input local graph need to have the same "
633  // "number of rows. The row Map claims " << getLocalNumRows () << " row(s), "
634  // "but the local graph claims " << k_local_graph_.numRows () << " row(s).");
635  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
636  lclIndsUnpacked_wdv.extent (0) != 0 || gblInds_wdv.extent (0) != 0, std::logic_error,
637  ": cannot have 1D data structures allocated.");
638 
639  if(! params.is_null() && params->isParameter("sorted") &&
640  ! params->get<bool>("sorted")) {
641  indicesAreSorted_ = false;
642  }
643  else {
644  indicesAreSorted_ = true;
645  }
646 
647  setDomainRangeMaps (domainMap.is_null() ? rowMap_ : domainMap,
648  rangeMap .is_null() ? rowMap_ : rangeMap);
649  Teuchos::Array<int> remotePIDs (0); // unused output argument
650  this->makeImportExport (remotePIDs, false);
651 
652  lclIndsPacked_wdv = local_inds_wdv_type(k_local_graph_.entries);
654  this->setRowPtrs(k_local_graph_.row_map);
655 
656  set_need_sync_host_uvm_access(); // lclGraph_ potentially still in a kernel
657 
658  const bool callComputeGlobalConstants = params.get () == nullptr ||
659  params->get ("compute global constants", true);
660 
661  if (callComputeGlobalConstants) {
662  this->computeGlobalConstants ();
663  }
664  this->fillComplete_ = true;
665  this->checkInternalState ();
666  }
667 
668  template <class LocalOrdinal, class GlobalOrdinal, class Node>
671  const Teuchos::RCP<const map_type>& rowMap,
672  const Teuchos::RCP<const map_type>& colMap,
673  const Teuchos::RCP<const map_type>& domainMap,
674  const Teuchos::RCP<const map_type>& rangeMap,
675  const Teuchos::RCP<const import_type>& importer,
676  const Teuchos::RCP<const export_type>& exporter,
677  const Teuchos::RCP<Teuchos::ParameterList>& params) :
678  DistObject<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, node_type> (rowMap),
679  rowMap_ (rowMap),
680  colMap_ (colMap),
681  rangeMap_ (rangeMap.is_null () ? rowMap : rangeMap),
682  domainMap_ (domainMap.is_null () ? rowMap : domainMap),
683  importer_ (importer),
684  exporter_ (exporter),
685  numAllocForAllRows_ (0),
686  storageStatus_ (Details::STORAGE_1D_PACKED),
687  indicesAreAllocated_ (true),
688  indicesAreLocal_ (true)
689  {
690  staticAssertions();
691  const char tfecfFuncName[] = "Tpetra::CrsGraph(local_graph_device_type,"
692  "Map,Map,Map,Map,Import,Export,params): ";
693 
694  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
695  (colMap.is_null (), std::runtime_error,
696  "The input column Map must be nonnull.");
697 
698  lclIndsPacked_wdv = local_inds_wdv_type(lclGraph.entries);
700  setRowPtrs(lclGraph.row_map);
701 
702  set_need_sync_host_uvm_access(); // lclGraph_ potentially still in a kernel
703 
704  if (! params.is_null() && params->isParameter("sorted") &&
705  ! params->get<bool>("sorted")) {
706  indicesAreSorted_ = false;
707  }
708  else {
709  indicesAreSorted_ = true;
710  }
711 
712  const bool callComputeGlobalConstants =
713  params.get () == nullptr ||
714  params->get ("compute global constants", true);
715  if (callComputeGlobalConstants) {
716  this->computeGlobalConstants ();
717  }
718  fillComplete_ = true;
720  }
721 
722  template <class LocalOrdinal, class GlobalOrdinal, class Node>
724  CrsGraph (const row_ptrs_device_view_type& rowPointers,
725  const local_inds_wdv_type& columnIndices,
726  const Teuchos::RCP<const map_type>& rowMap,
727  const Teuchos::RCP<const map_type>& colMap,
728  const Teuchos::RCP<const map_type>& domainMap,
729  const Teuchos::RCP<const map_type>& rangeMap,
730  const Teuchos::RCP<const import_type>& importer,
731  const Teuchos::RCP<const export_type>& exporter,
732  const Teuchos::RCP<Teuchos::ParameterList>& params) :
733  DistObject<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, node_type> (rowMap),
734  rowMap_ (rowMap),
735  colMap_ (colMap),
736  rangeMap_ (rangeMap.is_null () ? rowMap : rangeMap),
737  domainMap_ (domainMap.is_null () ? rowMap : domainMap),
738  importer_ (importer),
739  exporter_ (exporter),
740  numAllocForAllRows_ (0),
741  storageStatus_ (Details::STORAGE_1D_PACKED),
742  indicesAreAllocated_ (true),
743  indicesAreLocal_ (true)
744  {
745  staticAssertions();
746  const char tfecfFuncName[] = "Tpetra::CrsGraph(row_ptrs_device_view_type,local_inds_wdv_type"
747  "Map,Map,Map,Map,Import,Export,params): ";
748 
749  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
750  (colMap.is_null (), std::runtime_error,
751  "The input column Map must be nonnull.");
752 
753  lclIndsPacked_wdv = columnIndices;
755  setRowPtrs(rowPointers);
756 
757  set_need_sync_host_uvm_access(); // lclGraph_ potentially still in a kernel
758 
759  if (! params.is_null() && params->isParameter("sorted") &&
760  ! params->get<bool>("sorted")) {
761  indicesAreSorted_ = false;
762  }
763  else {
764  indicesAreSorted_ = true;
765  }
766 
767  const bool callComputeGlobalConstants =
768  params.get () == nullptr ||
769  params->get ("compute global constants", true);
770  if (callComputeGlobalConstants) {
771  this->computeGlobalConstants ();
772  }
773  fillComplete_ = true;
775  }
776 
777  template <class LocalOrdinal, class GlobalOrdinal, class Node>
778  Teuchos::RCP<const Teuchos::ParameterList>
781  {
782  using Teuchos::RCP;
783  using Teuchos::ParameterList;
784  using Teuchos::parameterList;
785 
786  RCP<ParameterList> params = parameterList ("Tpetra::CrsGraph");
787 
788  // Make a sublist for the Import.
789  RCP<ParameterList> importSublist = parameterList ("Import");
790 
791  // FIXME (mfh 02 Apr 2012) We should really have the Import and
792  // Export objects fill in these lists. However, we don't want to
793  // create an Import or Export unless we need them. For now, we
794  // know that the Import and Export just pass the list directly to
795  // their Distributor, so we can create a Distributor here
796  // (Distributor's constructor is a lightweight operation) and have
797  // it fill in the list.
798 
799  // Fill in Distributor default parameters by creating a
800  // Distributor and asking it to do the work.
801  Distributor distributor (rowMap_->getComm (), importSublist);
802  params->set ("Import", *importSublist, "How the Import performs communication.");
803 
804  // Make a sublist for the Export. For now, it's a clone of the
805  // Import sublist. It's not a shallow copy, though, since we
806  // might like the Import to do communication differently than the
807  // Export.
808  params->set ("Export", *importSublist, "How the Export performs communication.");
809 
810  return params;
811  }
812 
813  template <class LocalOrdinal, class GlobalOrdinal, class Node>
814  void
816  setParameterList (const Teuchos::RCP<Teuchos::ParameterList>& params)
817  {
818  Teuchos::RCP<const Teuchos::ParameterList> validParams =
819  getValidParameters ();
820  params->validateParametersAndSetDefaults (*validParams);
821  this->setMyParamList (params);
822  }
823 
824  template <class LocalOrdinal, class GlobalOrdinal, class Node>
828  {
829  return rowMap_->getGlobalNumElements ();
830  }
831 
832  template <class LocalOrdinal, class GlobalOrdinal, class Node>
836  {
837  const char tfecfFuncName[] = "getGlobalNumCols: ";
838  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
839  ! isFillComplete () || getDomainMap ().is_null (), std::runtime_error,
840  "The graph does not have a domain Map. You may not call this method in "
841  "that case.");
842  return getDomainMap ()->getGlobalNumElements ();
843  }
844 
845 
846  template <class LocalOrdinal, class GlobalOrdinal, class Node>
847  size_t
850  {
851  return this->rowMap_.is_null () ?
852  static_cast<size_t> (0) :
853  this->rowMap_->getLocalNumElements ();
854  }
855 
856 
857  template <class LocalOrdinal, class GlobalOrdinal, class Node>
858  size_t
861  {
862  const char tfecfFuncName[] = "getLocalNumCols: ";
863  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
864  ! hasColMap (), std::runtime_error,
865  "The graph does not have a column Map. You may not call this method "
866  "unless the graph has a column Map. This requires either that a custom "
867  "column Map was given to the constructor, or that fillComplete() has "
868  "been called.");
869  return colMap_.is_null () ? static_cast<size_t> (0) :
870  colMap_->getLocalNumElements ();
871  }
872 
873 
874 
875  template <class LocalOrdinal, class GlobalOrdinal, class Node>
876  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
878  getRowMap () const
879  {
880  return rowMap_;
881  }
882 
883  template <class LocalOrdinal, class GlobalOrdinal, class Node>
884  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
886  getColMap () const
887  {
888  return colMap_;
889  }
890 
891  template <class LocalOrdinal, class GlobalOrdinal, class Node>
892  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
895  {
896  return domainMap_;
897  }
898 
899  template <class LocalOrdinal, class GlobalOrdinal, class Node>
900  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
902  getRangeMap () const
903  {
904  return rangeMap_;
905  }
906 
907  template <class LocalOrdinal, class GlobalOrdinal, class Node>
908  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::import_type>
910  getImporter () const
911  {
912  return importer_;
913  }
914 
915  template <class LocalOrdinal, class GlobalOrdinal, class Node>
916  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::export_type>
918  getExporter () const
919  {
920  return exporter_;
921  }
922 
923  template <class LocalOrdinal, class GlobalOrdinal, class Node>
924  bool
926  hasColMap () const
927  {
928  return ! colMap_.is_null ();
929  }
930 
931  template <class LocalOrdinal, class GlobalOrdinal, class Node>
932  bool
935  {
936  // FIXME (mfh 07 Aug 2014) Why wouldn't storage be optimized if
937  // getLocalNumRows() is zero?
938 
939  const bool isOpt = indicesAreAllocated_ &&
940  k_numRowEntries_.extent (0) == 0 &&
941  getLocalNumRows () > 0;
942 
943  return isOpt;
944  }
945 
946 
947  template <class LocalOrdinal, class GlobalOrdinal, class Node>
951  {
952  const char tfecfFuncName[] = "getGlobalNumEntries: ";
953  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
954  (! this->haveGlobalConstants_, std::logic_error,
955  "The graph does not have global constants computed, "
956  "but the user has requested them.");
957 
958  return globalNumEntries_;
959  }
960 
961 
962  template <class LocalOrdinal, class GlobalOrdinal, class Node>
963  size_t
966  {
967  const char tfecfFuncName[] = "getLocalNumEntries: ";
968  typedef LocalOrdinal LO;
969 
970  if (this->indicesAreAllocated_) {
971  const LO lclNumRows = this->getLocalNumRows ();
972  if (lclNumRows == 0) {
973  return static_cast<size_t> (0);
974  }
975  else {
976  // Avoid the "*this capture" issue by creating a local Kokkos::View.
977  auto numEntPerRow = this->k_numRowEntries_;
978  const LO numNumEntPerRow = numEntPerRow.extent (0);
979  if (numNumEntPerRow == 0) {
980  if (static_cast<LO> (this->getRowPtrsPackedDevice().extent (0)) <
981  static_cast<LO> (lclNumRows + 1)) {
982  return static_cast<size_t> (0);
983  }
984  else {
985  // indices are allocated and k_numRowEntries_ is not allocated,
986  // so we have packed storage and the length of lclIndsPacked_wdv
987  // must be the number of local entries.
988  if(debug_) {
989  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
990  (this->getRowPtrsPackedHost()(lclNumRows) != lclIndsPacked_wdv.extent(0), std::logic_error,
991  "Final entry of packed host rowptrs doesn't match the length of lclIndsPacked");
992  }
993  return lclIndsPacked_wdv.extent(0);
994  }
995  }
996  else { // k_numRowEntries_ is populated
997  // k_numRowEntries_ is actually be a host View, so we run
998  // the sum in its native execution space. This also means
999  // that we can use explicit capture (which could perhaps
1000  // improve build time) instead of KOKKOS_LAMBDA, and avoid
1001  // any CUDA build issues with trying to run a __device__ -
1002  // only function on host.
1003  typedef typename num_row_entries_type::execution_space
1004  host_exec_space;
1005  typedef Kokkos::RangePolicy<host_exec_space, LO> range_type;
1006 
1007  const LO upperLoopBound = lclNumRows < numNumEntPerRow ?
1008  lclNumRows :
1009  numNumEntPerRow;
1010  size_t nodeNumEnt = 0;
1011  Kokkos::parallel_reduce ("Tpetra::CrsGraph::getNumNodeEntries",
1012  range_type (0, upperLoopBound),
1013  [=] (const LO& k, size_t& lclSum) {
1014  lclSum += numEntPerRow(k);
1015  }, nodeNumEnt);
1016  return nodeNumEnt;
1017  }
1018  }
1019  }
1020  else { // nothing allocated on this process, so no entries
1021  return static_cast<size_t> (0);
1022  }
1023  }
1024 
1025  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1029  {
1030  const char tfecfFuncName[] = "getGlobalMaxNumRowEntries: ";
1031  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1032  (! this->haveGlobalConstants_, std::logic_error,
1033  "The graph does not have global constants computed, "
1034  "but the user has requested them.");
1035 
1036  return globalMaxNumRowEntries_;
1037  }
1038 
1039  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1040  size_t
1043  {
1044  return nodeMaxNumRowEntries_;
1045  }
1046 
1047  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1048  bool
1051  {
1052  return fillComplete_;
1053  }
1054 
1055  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1056  bool
1059  {
1060  return ! fillComplete_;
1061  }
1062 
1063 
1064  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1065  bool
1068  {
1069  return indicesAreLocal_;
1070  }
1071 
1072  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1073  bool
1076  {
1077  return indicesAreGlobal_;
1078  }
1079 
1080  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1081  size_t
1084  {
1085  typedef LocalOrdinal LO;
1086 
1087  if (this->indicesAreAllocated_) {
1088  const LO lclNumRows = this->getLocalNumRows ();
1089  if (lclNumRows == 0) {
1090  return static_cast<size_t> (0);
1091  }
1092  else if (storageStatus_ == Details::STORAGE_1D_PACKED) {
1093  if (static_cast<LO> (this->getRowPtrsPackedDevice().extent (0)) <
1094  static_cast<LO> (lclNumRows + 1)) {
1095  return static_cast<size_t> (0);
1096  }
1097  else {
1098  if(this->isLocallyIndexed())
1099  return lclIndsPacked_wdv.extent(0);
1100  else
1101  return gblInds_wdv.extent(0);
1102  }
1103  }
1104  else if (storageStatus_ == Details::STORAGE_1D_UNPACKED) {
1105  auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
1106  if (rowPtrsUnpacked_host.extent (0) == 0) {
1107  return static_cast<size_t> (0);
1108  }
1109  else {
1110  if(this->isLocallyIndexed())
1111  return lclIndsUnpacked_wdv.extent(0);
1112  else
1113  return gblInds_wdv.extent(0);
1114  }
1115  }
1116  else {
1117  return static_cast<size_t> (0);
1118  }
1119  }
1120  else {
1121  return Tpetra::Details::OrdinalTraits<size_t>::invalid ();
1122  }
1123  }
1124 
1125  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1126  Teuchos::RCP<const Teuchos::Comm<int> >
1128  getComm () const
1129  {
1130  return this->rowMap_.is_null () ? Teuchos::null : this->rowMap_->getComm ();
1131  }
1132 
1133  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1134  GlobalOrdinal
1137  {
1138  return rowMap_->getIndexBase ();
1139  }
1140 
1141  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1142  bool
1144  indicesAreAllocated () const
1145  {
1146  return indicesAreAllocated_;
1147  }
1148 
1149  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1150  bool
1152  isSorted () const
1153  {
1154  return indicesAreSorted_;
1155  }
1156 
1157  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1158  bool
1160  isMerged () const
1161  {
1162  return noRedundancies_;
1163  }
1164 
1165  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1166  void
1169  {
1170  // FIXME (mfh 07 May 2013) How do we know that the change
1171  // introduced a redundancy, or even that it invalidated the sorted
1172  // order of indices? CrsGraph has always made this conservative
1173  // guess. It could be a bit costly to check at insertion time,
1174  // though.
1175  indicesAreSorted_ = false;
1176  noRedundancies_ = false;
1177 
1178  // We've modified the graph, so we'll have to recompute local
1179  // constants like the number of diagonal entries on this process.
1180  haveLocalConstants_ = false;
1181  }
1182 
1183  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1184  void
1186  allocateIndices (const ELocalGlobal lg, const bool verbose)
1187  {
1189  using Teuchos::arcp;
1190  using Teuchos::Array;
1191  using Teuchos::ArrayRCP;
1192  using std::endl;
1193  typedef Teuchos::ArrayRCP<size_t>::size_type size_type;
1194  typedef typename local_graph_device_type::row_map_type::non_const_type
1195  non_const_row_map_type;
1196  const char tfecfFuncName[] = "allocateIndices: ";
1197  const char suffix[] =
1198  " Please report this bug to the Tpetra developers.";
1199  ProfilingRegion profRegion("Tpetra::CrsGraph::allocateIndices");
1200 
1201  std::unique_ptr<std::string> prefix;
1202  if (verbose) {
1203  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
1204  std::ostringstream os;
1205  os << *prefix << "Start: lg="
1206  << (lg == GlobalIndices ? "GlobalIndices" : "LocalIndices")
1207  << ", numRows: " << this->getLocalNumRows() << endl;
1208  std::cerr << os.str();
1209  }
1210 
1211  // This is a protected function, only callable by us. If it was
1212  // called incorrectly, it is our fault. That's why the tests
1213  // below throw std::logic_error instead of std::invalid_argument.
1214  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1215  (isLocallyIndexed () && lg == GlobalIndices, std::logic_error,
1216  ": The graph is locally indexed, but Tpetra code is calling "
1217  "this method with lg=GlobalIndices." << suffix);
1218  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1219  (isGloballyIndexed () && lg == LocalIndices, std::logic_error,
1220  ": The graph is globally indexed, but Tpetra code is calling "
1221  "this method with lg=LocalIndices." << suffix);
1222  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1223  (indicesAreAllocated (), std::logic_error, ": The graph's "
1224  "indices are already allocated, but Tpetra is calling "
1225  "allocateIndices again." << suffix);
1226  const size_t numRows = this->getLocalNumRows ();
1227 
1228  //
1229  // STATIC ALLOCATION PROFILE
1230  //
1231  size_type numInds = 0;
1232  {
1233  if (verbose) {
1234  std::ostringstream os;
1235  os << *prefix << "Allocate k_rowPtrs: " << (numRows+1) << endl;
1236  std::cerr << os.str();
1237  }
1238  non_const_row_map_type k_rowPtrs ("Tpetra::CrsGraph::ptr", numRows + 1);
1239 
1240  if (this->k_numAllocPerRow_.extent (0) != 0) {
1241  // It's OK to throw std::invalid_argument here, because we
1242  // haven't incurred any side effects yet. Throwing that
1243  // exception (and not, say, std::logic_error) implies that the
1244  // instance can recover.
1245  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1246  (this->k_numAllocPerRow_.extent (0) != numRows,
1247  std::invalid_argument, "k_numAllocPerRow_ is allocated, that is, "
1248  "has nonzero length " << this->k_numAllocPerRow_.extent (0)
1249  << ", but its length != numRows = " << numRows << ".");
1250 
1251  // k_numAllocPerRow_ is a host View, but k_rowPtrs (the thing
1252  // we want to compute here) lives on device. That's OK;
1253  // computeOffsetsFromCounts can handle this case.
1255 
1256  // FIXME (mfh 27 Jun 2016) Currently, computeOffsetsFromCounts
1257  // doesn't attempt to check its input for "invalid" flag
1258  // values. For now, we omit that feature of the sequential
1259  // code disabled below.
1260  numInds = computeOffsetsFromCounts (k_rowPtrs, k_numAllocPerRow_);
1261  }
1262  else {
1263  // It's OK to throw std::invalid_argument here, because we
1264  // haven't incurred any side effects yet. Throwing that
1265  // exception (and not, say, std::logic_error) implies that the
1266  // instance can recover.
1267  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1268  (this->numAllocForAllRows_ ==
1269  Tpetra::Details::OrdinalTraits<size_t>::invalid (),
1270  std::invalid_argument, "numAllocForAllRows_ has an invalid value, "
1271  "namely Tpetra::Details::OrdinalTraits<size_t>::invalid() = " <<
1272  Tpetra::Details::OrdinalTraits<size_t>::invalid () << ".");
1273 
1275  numInds = computeOffsetsFromConstantCount (k_rowPtrs, this->numAllocForAllRows_);
1276  }
1277  // "Commit" the resulting row offsets.
1278  setRowPtrsUnpacked(k_rowPtrs);
1279  }
1280  if(debug_) {
1281  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1282  (numInds != size_type(this->getRowPtrsUnpackedHost()(numRows)), std::logic_error,
1283  ": Number of indices produced by computeOffsetsFrom[Constant]Counts "
1284  "does not match final entry of rowptrs unpacked");
1285  }
1286 
1287 
1288  if (lg == LocalIndices) {
1289  if (verbose) {
1290  std::ostringstream os;
1291  os << *prefix << "Allocate local column indices "
1292  "lclIndsUnpacked_wdv: " << numInds << endl;
1293  std::cerr << os.str();
1294  }
1295  lclIndsUnpacked_wdv = local_inds_wdv_type (
1296  local_inds_dualv_type("Tpetra::CrsGraph::lclInd",numInds));
1297  }
1298  else {
1299  if (verbose) {
1300  std::ostringstream os;
1301  os << *prefix << "Allocate global column indices "
1302  "gblInds_wdv: " << numInds << endl;
1303  std::cerr << os.str();
1304  }
1305  gblInds_wdv = global_inds_wdv_type (
1306  global_inds_dualv_type("Tpetra::CrsGraph::gblInd",numInds));
1307  }
1308  storageStatus_ = Details::STORAGE_1D_UNPACKED;
1309 
1310  this->indicesAreLocal_ = (lg == LocalIndices);
1311  this->indicesAreGlobal_ = (lg == GlobalIndices);
1312 
1313  if (numRows > 0) { // reallocate k_numRowEntries_ & fill w/ 0s
1314  using Kokkos::ViewAllocateWithoutInitializing;
1315  const char label[] = "Tpetra::CrsGraph::numRowEntries";
1316  if (verbose) {
1317  std::ostringstream os;
1318  os << *prefix << "Allocate k_numRowEntries_: " << numRows
1319  << endl;
1320  std::cerr << os.str();
1321  }
1322  num_row_entries_type numRowEnt (ViewAllocateWithoutInitializing (label), numRows);
1323  // DEEP_COPY REVIEW - VALUE-TO-HOSTMIRROR
1324  Kokkos::deep_copy (execution_space(), numRowEnt, static_cast<size_t> (0)); // fill w/ 0s
1325  Kokkos::fence("CrsGraph::allocateIndices"); // TODO: Need to understand downstream failure points and move this fence.
1326  this->k_numRowEntries_ = numRowEnt; // "commit" our allocation
1327  }
1328 
1329  // Once indices are allocated, CrsGraph needs to free this information.
1330  this->numAllocForAllRows_ = 0;
1331  this->k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
1332  this->indicesAreAllocated_ = true;
1333 
1334  try {
1335  this->checkInternalState ();
1336  }
1337  catch (std::logic_error& e) {
1338  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1339  (true, std::logic_error, "At end of allocateIndices, "
1340  "checkInternalState threw std::logic_error: "
1341  << e.what ());
1342  }
1343  catch (std::exception& e) {
1344  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1345  (true, std::runtime_error, "At end of allocateIndices, "
1346  "checkInternalState threw std::exception: "
1347  << e.what ());
1348  }
1349  catch (...) {
1350  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1351  (true, std::runtime_error, "At end of allocateIndices, "
1352  "checkInternalState threw an exception "
1353  "not a subclass of std::exception.");
1354  }
1355 
1356  if (verbose) {
1357  std::ostringstream os;
1358  os << *prefix << "Done" << endl;
1359  std::cerr << os.str();
1360  }
1361  }
1362 
1363  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1364  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1365  local_inds_dualv_type::t_host::const_type
1367  getLocalIndsViewHost (const RowInfo& rowinfo) const
1368  {
1369  if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1370  return typename local_inds_dualv_type::t_host::const_type ();
1371  else
1372  return lclIndsUnpacked_wdv.getHostSubview(rowinfo.offset1D,
1373  rowinfo.allocSize,
1374  Access::ReadOnly);
1375  }
1376 
1377  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1379  local_inds_dualv_type::t_host
1382  {
1383  if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1384  return typename local_inds_dualv_type::t_host ();
1385  else
1386  return lclIndsUnpacked_wdv.getHostSubview(rowinfo.offset1D,
1387  rowinfo.allocSize,
1388  Access::ReadWrite);
1389  }
1390 
1391  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1393  global_inds_dualv_type::t_host::const_type
1395  getGlobalIndsViewHost (const RowInfo& rowinfo) const
1396  {
1397  if (rowinfo.allocSize == 0 || gblInds_wdv.extent(0) == 0)
1398  return typename global_inds_dualv_type::t_host::const_type ();
1399  else
1400  return gblInds_wdv.getHostSubview(rowinfo.offset1D,
1401  rowinfo.allocSize,
1402  Access::ReadOnly);
1403  }
1404 
1405  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1407  local_inds_dualv_type::t_dev::const_type
1409  getLocalIndsViewDevice (const RowInfo& rowinfo) const
1410  {
1411  if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1412  return typename local_inds_dualv_type::t_dev::const_type ();
1413  else
1414  return lclIndsUnpacked_wdv.getDeviceSubview(rowinfo.offset1D,
1415  rowinfo.allocSize,
1416  Access::ReadOnly);
1417  }
1418 
1419  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1421  global_inds_dualv_type::t_dev::const_type
1423  getGlobalIndsViewDevice (const RowInfo& rowinfo) const
1424  {
1425  if (rowinfo.allocSize == 0 || gblInds_wdv.extent(0) == 0)
1426  return typename global_inds_dualv_type::t_dev::const_type ();
1427  else
1428  return gblInds_wdv.getDeviceSubview(rowinfo.offset1D,
1429  rowinfo.allocSize,
1430  Access::ReadOnly);
1431  }
1432 
1433 
1434  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1435  RowInfo
1437  getRowInfo (const LocalOrdinal myRow) const
1438  {
1439  const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1440  RowInfo ret;
1441  if (this->rowMap_.is_null () || ! this->rowMap_->isNodeLocalElement (myRow)) {
1442  ret.localRow = STINV;
1443  ret.allocSize = 0;
1444  ret.numEntries = 0;
1445  ret.offset1D = STINV;
1446  return ret;
1447  }
1448 
1449  ret.localRow = static_cast<size_t> (myRow);
1450  if (this->indicesAreAllocated ()) {
1451  auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
1452  // Offsets tell us the allocation size in this case.
1453  if (rowPtrsUnpacked_host.extent (0) == 0) {
1454  ret.offset1D = 0;
1455  ret.allocSize = 0;
1456  }
1457  else {
1458  ret.offset1D = rowPtrsUnpacked_host(myRow);
1459  ret.allocSize = rowPtrsUnpacked_host(myRow+1) - rowPtrsUnpacked_host(myRow);
1460  }
1461 
1462  ret.numEntries = (this->k_numRowEntries_.extent (0) == 0) ?
1463  ret.allocSize :
1464  this->k_numRowEntries_(myRow);
1465  }
1466  else { // haven't performed allocation yet; probably won't hit this code
1467  // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1468  // allocate, rather than doing lazy allocation at first insert.
1469  // This will make k_numAllocPerRow_ obsolete.
1470  ret.allocSize = (this->k_numAllocPerRow_.extent (0) != 0) ?
1471  this->k_numAllocPerRow_(myRow) : // this is a host View
1472  this->numAllocForAllRows_;
1473  ret.numEntries = 0;
1474  ret.offset1D = STINV;
1475  }
1476 
1477  return ret;
1478  }
1479 
1480 
1481  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1482  RowInfo
1484  getRowInfoFromGlobalRowIndex (const GlobalOrdinal gblRow) const
1485  {
1486  const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1487  RowInfo ret;
1488  if (this->rowMap_.is_null ()) {
1489  ret.localRow = STINV;
1490  ret.allocSize = 0;
1491  ret.numEntries = 0;
1492  ret.offset1D = STINV;
1493  return ret;
1494  }
1495  const LocalOrdinal myRow = this->rowMap_->getLocalElement (gblRow);
1496  if (myRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid ()) {
1497  ret.localRow = STINV;
1498  ret.allocSize = 0;
1499  ret.numEntries = 0;
1500  ret.offset1D = STINV;
1501  return ret;
1502  }
1503 
1504  ret.localRow = static_cast<size_t> (myRow);
1505  if (this->indicesAreAllocated ()) {
1506  // graph data structures have the info that we need
1507  //
1508  // if static graph, offsets tell us the allocation size
1509  auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
1510  if (rowPtrsUnpacked_host.extent (0) == 0) {
1511  ret.offset1D = 0;
1512  ret.allocSize = 0;
1513  }
1514  else {
1515  ret.offset1D = rowPtrsUnpacked_host(myRow);
1516  ret.allocSize = rowPtrsUnpacked_host(myRow+1) - rowPtrsUnpacked_host(myRow);
1517  }
1518 
1519  ret.numEntries = (this->k_numRowEntries_.extent (0) == 0) ?
1520  ret.allocSize :
1521  this->k_numRowEntries_(myRow);
1522  }
1523  else { // haven't performed allocation yet; probably won't hit this code
1524  // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1525  // allocate, rather than doing lazy allocation at first insert.
1526  // This will make k_numAllocPerRow_ obsolete.
1527  ret.allocSize = (this->k_numAllocPerRow_.extent (0) != 0) ?
1528  this->k_numAllocPerRow_(myRow) : // this is a host View
1529  this->numAllocForAllRows_;
1530  ret.numEntries = 0;
1531  ret.offset1D = STINV;
1532  }
1533 
1534  return ret;
1535  }
1536 
1537 
1538  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1539  void
1541  staticAssertions () const
1542  {
1543  using Teuchos::OrdinalTraits;
1544  typedef LocalOrdinal LO;
1545  typedef GlobalOrdinal GO;
1546  typedef global_size_t GST;
1547 
1548  // Assumption: sizeof(GlobalOrdinal) >= sizeof(LocalOrdinal):
1549  // This is so that we can store local indices in the memory
1550  // formerly occupied by global indices.
1551  static_assert (sizeof (GlobalOrdinal) >= sizeof (LocalOrdinal),
1552  "Tpetra::CrsGraph: sizeof(GlobalOrdinal) must be >= sizeof(LocalOrdinal).");
1553  // Assumption: max(size_t) >= max(LocalOrdinal)
1554  // This is so that we can represent any LocalOrdinal as a size_t.
1555  static_assert (sizeof (size_t) >= sizeof (LocalOrdinal),
1556  "Tpetra::CrsGraph: sizeof(size_t) must be >= sizeof(LocalOrdinal).");
1557  static_assert (sizeof(GST) >= sizeof(size_t),
1558  "Tpetra::CrsGraph: sizeof(Tpetra::global_size_t) must be >= sizeof(size_t).");
1559 
1560  // FIXME (mfh 30 Sep 2015) We're not using
1561  // Teuchos::CompileTimeAssert any more. Can we do these checks
1562  // with static_assert?
1563 
1564  // can't call max() with CompileTimeAssert, because it isn't a
1565  // constant expression; will need to make this a runtime check
1566  const char msg[] = "Tpetra::CrsGraph: Object cannot be created with the "
1567  "given template arguments: size assumptions are not valid.";
1568  TEUCHOS_TEST_FOR_EXCEPTION(
1569  static_cast<size_t> (Teuchos::OrdinalTraits<LO>::max ()) > Teuchos::OrdinalTraits<size_t>::max (),
1570  std::runtime_error, msg);
1571  TEUCHOS_TEST_FOR_EXCEPTION(
1572  static_cast<GST> (Teuchos::OrdinalTraits<LO>::max ()) > static_cast<GST> (Teuchos::OrdinalTraits<GO>::max ()),
1573  std::runtime_error, msg);
1574  TEUCHOS_TEST_FOR_EXCEPTION(
1575  static_cast<size_t> (Teuchos::OrdinalTraits<GO>::max ()) > Teuchos::OrdinalTraits<GST>::max(),
1576  std::runtime_error, msg);
1577  TEUCHOS_TEST_FOR_EXCEPTION(
1578  Teuchos::OrdinalTraits<size_t>::max () > Teuchos::OrdinalTraits<GST>::max (),
1579  std::runtime_error, msg);
1580  }
1581 
1582 
1583  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1584  size_t
1587  const SLocalGlobalViews &newInds,
1588  const ELocalGlobal lg,
1589  const ELocalGlobal I)
1590  {
1591  using Teuchos::ArrayView;
1592  typedef LocalOrdinal LO;
1593  typedef GlobalOrdinal GO;
1594  const char tfecfFuncName[] = "insertIndices: ";
1595 
1596  size_t oldNumEnt = 0;
1597  if (debug_) {
1598  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1599  (lg != GlobalIndices && lg != LocalIndices, std::invalid_argument,
1600  "lg must be either GlobalIndices or LocalIndices.");
1601  oldNumEnt = this->getNumEntriesInLocalRow (rowinfo.localRow);
1602  }
1603 
1604  size_t numNewInds = 0;
1605  if (lg == GlobalIndices) { // input indices are global
1606  ArrayView<const GO> new_ginds = newInds.ginds;
1607  numNewInds = new_ginds.size();
1608  if (I == GlobalIndices) { // store global indices
1609  auto gind_view = gblInds_wdv.getHostView(Access::ReadWrite);
1610  if (debug_) {
1611  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1612  (static_cast<size_t> (gind_view.size ()) <
1613  rowinfo.numEntries + numNewInds, std::logic_error,
1614  "gind_view.size() = " << gind_view.size ()
1615  << " < rowinfo.numEntries (= " << rowinfo.numEntries
1616  << ") + numNewInds (= " << numNewInds << ").");
1617  }
1618  GO* const gblColInds_out = gind_view.data () + rowinfo.offset1D
1619  + rowinfo.numEntries;
1620  for (size_t k = 0; k < numNewInds; ++k) {
1621  gblColInds_out[k] = new_ginds[k];
1622  }
1623  }
1624  else if (I == LocalIndices) { // store local indices
1625  auto lind_view = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1626  if (debug_) {
1627  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1628  (static_cast<size_t> (lind_view.size ()) <
1629  rowinfo.numEntries + numNewInds, std::logic_error,
1630  "lind_view.size() = " << lind_view.size ()
1631  << " < rowinfo.numEntries (= " << rowinfo.numEntries
1632  << ") + numNewInds (= " << numNewInds << ").");
1633  }
1634  LO* const lclColInds_out = lind_view.data () + rowinfo.offset1D
1635  + rowinfo.numEntries;
1636  for (size_t k = 0; k < numNewInds; ++k) {
1637  lclColInds_out[k] = colMap_->getLocalElement (new_ginds[k]);
1638  }
1639  }
1640  }
1641  else if (lg == LocalIndices) { // input indices are local
1642  ArrayView<const LO> new_linds = newInds.linds;
1643  numNewInds = new_linds.size();
1644  if (I == LocalIndices) { // store local indices
1645  auto lind_view = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1646  if (debug_) {
1647  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1648  (static_cast<size_t> (lind_view.size ()) <
1649  rowinfo.numEntries + numNewInds, std::logic_error,
1650  "lind_view.size() = " << lind_view.size ()
1651  << " < rowinfo.numEntries (= " << rowinfo.numEntries
1652  << ") + numNewInds (= " << numNewInds << ").");
1653  }
1654  LO* const lclColInds_out = lind_view.data () + rowinfo.offset1D
1655  + rowinfo.numEntries;
1656  for (size_t k = 0; k < numNewInds; ++k) {
1657  lclColInds_out[k] = new_linds[k];
1658  }
1659  }
1660  else if (I == GlobalIndices) {
1661  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1662  (true, std::logic_error, "The case where the input indices are local "
1663  "and the indices to write are global (lg=LocalIndices, I="
1664  "GlobalIndices) is not implemented, because it does not make sense."
1665  << std::endl << "If you have correct local column indices, that "
1666  "means the graph has a column Map. In that case, you should be "
1667  "storing local indices.");
1668  }
1669  }
1670 
1671  rowinfo.numEntries += numNewInds;
1672  this->k_numRowEntries_(rowinfo.localRow) += numNewInds;
1673  this->setLocallyModified ();
1674 
1675  if (debug_) {
1676  const size_t chkNewNumEnt =
1677  this->getNumEntriesInLocalRow (rowinfo.localRow);
1678  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1679  (chkNewNumEnt != oldNumEnt + numNewInds, std::logic_error,
1680  "chkNewNumEnt = " << chkNewNumEnt
1681  << " != oldNumEnt (= " << oldNumEnt
1682  << ") + numNewInds (= " << numNewInds << ").");
1683  }
1684 
1685  return numNewInds;
1686  }
1687 
1688  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1689  size_t
1691  insertGlobalIndicesImpl (const LocalOrdinal lclRow,
1692  const GlobalOrdinal inputGblColInds[],
1693  const size_t numInputInds)
1694  {
1695  return this->insertGlobalIndicesImpl (this->getRowInfo (lclRow),
1696  inputGblColInds, numInputInds);
1697  }
1698 
1699  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1700  size_t
1703  const GlobalOrdinal inputGblColInds[],
1704  const size_t numInputInds,
1705  std::function<void(const size_t, const size_t, const size_t)> fun)
1706  {
1708  using Kokkos::View;
1709  using Kokkos::subview;
1710  using Kokkos::MemoryUnmanaged;
1711  using Teuchos::ArrayView;
1712  using LO = LocalOrdinal;
1713  using GO = GlobalOrdinal;
1714  const char tfecfFuncName[] = "insertGlobalIndicesImpl: ";
1715  const LO lclRow = static_cast<LO> (rowInfo.localRow);
1716 
1717  auto numEntries = rowInfo.numEntries;
1718  using inp_view_type = View<const GO*, Kokkos::HostSpace, MemoryUnmanaged>;
1719  inp_view_type inputInds(inputGblColInds, numInputInds);
1720  size_t numInserted;
1721  {
1722  auto gblIndsHostView = this->gblInds_wdv.getHostView(Access::ReadWrite);
1723  numInserted = Details::insertCrsIndices(lclRow, this->getRowPtrsUnpackedHost(),
1724  gblIndsHostView,
1725  numEntries, inputInds, fun);
1726  }
1727 
1728  const bool insertFailed =
1729  numInserted == Teuchos::OrdinalTraits<size_t>::invalid();
1730  if(insertFailed) {
1731  constexpr size_t ONE (1);
1732  const int myRank = this->getComm()->getRank();
1733  std::ostringstream os;
1734 
1735  os << "Proc " << myRank << ": Not enough capacity to insert "
1736  << numInputInds
1737  << " ind" << (numInputInds != ONE ? "ices" : "ex")
1738  << " into local row " << lclRow << ", which currently has "
1739  << rowInfo.numEntries
1740  << " entr" << (rowInfo.numEntries != ONE ? "ies" : "y")
1741  << " and total allocation size " << rowInfo.allocSize
1742  << ". ";
1743  const size_t maxNumToPrint =
1745  ArrayView<const GO> inputGblColIndsView(inputGblColInds,
1746  numInputInds);
1747  verbosePrintArray(os, inputGblColIndsView, "Input global "
1748  "column indices", maxNumToPrint);
1749  os << ", ";
1750  auto curGblColInds = getGlobalIndsViewHost(rowInfo);
1751  ArrayView<const GO> curGblColIndsView(curGblColInds.data(),
1752  rowInfo.numEntries);
1753  verbosePrintArray(os, curGblColIndsView, "Current global "
1754  "column indices", maxNumToPrint);
1755  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1756  (true, std::runtime_error, os.str());
1757  }
1758 
1759  this->k_numRowEntries_(lclRow) += numInserted;
1760 
1761  this->setLocallyModified();
1762  return numInserted;
1763  }
1764 
1765 
1766  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1767  void
1769  insertLocalIndicesImpl (const LocalOrdinal myRow,
1770  const Teuchos::ArrayView<const LocalOrdinal>& indices,
1771  std::function<void(const size_t, const size_t, const size_t)> fun)
1772  {
1773  using Kokkos::MemoryUnmanaged;
1774  using Kokkos::subview;
1775  using Kokkos::View;
1776  using LO = LocalOrdinal;
1777  const char tfecfFuncName[] = "insertLocallIndicesImpl: ";
1778 
1779  const RowInfo rowInfo = this->getRowInfo(myRow);
1780 
1781  size_t numNewInds = 0;
1782  size_t newNumEntries = 0;
1783 
1784  auto numEntries = rowInfo.numEntries;
1785  // Note: Teuchos::ArrayViews are in HostSpace
1786  using inp_view_type = View<const LO*, Kokkos::HostSpace, MemoryUnmanaged>;
1787  inp_view_type inputInds(indices.getRawPtr(), indices.size());
1788  size_t numInserted = 0;
1789  {
1790  auto lclInds = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1791  numInserted = Details::insertCrsIndices(myRow, this->getRowPtrsUnpackedHost(), lclInds,
1792  numEntries, inputInds, fun);
1793  }
1794 
1795  const bool insertFailed =
1796  numInserted == Teuchos::OrdinalTraits<size_t>::invalid();
1797  if(insertFailed) {
1798  constexpr size_t ONE (1);
1799  const size_t numInputInds(indices.size());
1800  const int myRank = this->getComm()->getRank();
1801  std::ostringstream os;
1802  os << "On MPI Process " << myRank << ": Not enough capacity to "
1803  "insert " << numInputInds
1804  << " ind" << (numInputInds != ONE ? "ices" : "ex")
1805  << " into local row " << myRow << ", which currently has "
1806  << rowInfo.numEntries
1807  << " entr" << (rowInfo.numEntries != ONE ? "ies" : "y")
1808  << " and total allocation size " << rowInfo.allocSize << ".";
1809  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1810  (true, std::runtime_error, os.str());
1811  }
1812  numNewInds = numInserted;
1813  newNumEntries = rowInfo.numEntries + numNewInds;
1814 
1815  this->k_numRowEntries_(myRow) += numNewInds;
1816  this->setLocallyModified ();
1817 
1818  if (debug_) {
1819  const size_t chkNewNumEntries = this->getNumEntriesInLocalRow (myRow);
1820  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1821  (chkNewNumEntries != newNumEntries, std::logic_error,
1822  "getNumEntriesInLocalRow(" << myRow << ") = " << chkNewNumEntries
1823  << " != newNumEntries = " << newNumEntries
1824  << ". Please report this bug to the Tpetra developers.");
1825  }
1826  }
1827 
1828  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1829  size_t
1832  const Teuchos::ArrayView<const GlobalOrdinal>& indices,
1833  std::function<void(const size_t, const size_t, const size_t)> fun) const
1834  {
1835  using GO = GlobalOrdinal;
1836  using Kokkos::View;
1837  using Kokkos::MemoryUnmanaged;
1838  auto invalidCount = Teuchos::OrdinalTraits<size_t>::invalid();
1839 
1840  using inp_view_type = View<const GO*, Kokkos::HostSpace, MemoryUnmanaged>;
1841  inp_view_type inputInds(indices.getRawPtr(), indices.size());
1842 
1843  size_t numFound = 0;
1844  LocalOrdinal lclRow = rowInfo.localRow;
1845  if (this->isLocallyIndexed())
1846  {
1847  if (this->colMap_.is_null())
1848  return invalidCount;
1849  const auto& colMap = *(this->colMap_);
1850  auto map = [&](GO const gblInd){return colMap.getLocalElement(gblInd);};
1851  numFound = Details::findCrsIndices(lclRow, this->getRowPtrsUnpackedHost(),
1852  rowInfo.numEntries,
1853  lclIndsUnpacked_wdv.getHostView(Access::ReadOnly), inputInds, map, fun);
1854  }
1855  else if (this->isGloballyIndexed())
1856  {
1857  numFound = Details::findCrsIndices(lclRow, this->getRowPtrsUnpackedHost(),
1858  rowInfo.numEntries,
1859  gblInds_wdv.getHostView(Access::ReadOnly), inputInds, fun);
1860  }
1861  return numFound;
1862  }
1863 
1864 
1865  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1866  size_t
1869  const bool sorted,
1870  const bool merged)
1871  {
1872  const size_t origNumEnt = rowInfo.numEntries;
1873  if (origNumEnt != Tpetra::Details::OrdinalTraits<size_t>::invalid () &&
1874  origNumEnt != 0) {
1875  auto lclColInds = this->getLocalIndsViewHostNonConst (rowInfo);
1876 
1877  LocalOrdinal* const lclColIndsRaw = lclColInds.data ();
1878  if (! sorted) {
1879  std::sort (lclColIndsRaw, lclColIndsRaw + origNumEnt);
1880  }
1881 
1882  if (! merged) {
1883  LocalOrdinal* const beg = lclColIndsRaw;
1884  LocalOrdinal* const end = beg + rowInfo.numEntries;
1885  LocalOrdinal* const newend = std::unique (beg, end);
1886  const size_t newNumEnt = newend - beg;
1887 
1888  // NOTE (mfh 08 May 2017) This is a host View, so it does not assume UVM.
1889  this->k_numRowEntries_(rowInfo.localRow) = newNumEnt;
1890  return origNumEnt - newNumEnt; // the number of duplicates in the row
1891  }
1892  else {
1893  return static_cast<size_t> (0); // assume no duplicates
1894  }
1895  }
1896  else {
1897  return static_cast<size_t> (0); // no entries in the row
1898  }
1899  }
1900 
1901 
1902  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1903  void
1905  setDomainRangeMaps (const Teuchos::RCP<const map_type>& domainMap,
1906  const Teuchos::RCP<const map_type>& rangeMap)
1907  {
1908  // simple pointer comparison for equality
1909  if (domainMap_ != domainMap) {
1910  domainMap_ = domainMap;
1911  importer_ = Teuchos::null;
1912  }
1913  if (rangeMap_ != rangeMap) {
1914  rangeMap_ = rangeMap;
1915  exporter_ = Teuchos::null;
1916  }
1917  }
1918 
1919 
1920  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1921  void
1924  {
1925  const auto INV = Teuchos::OrdinalTraits<global_size_t>::invalid();
1926 
1927  globalNumEntries_ = INV;
1928  globalMaxNumRowEntries_ = INV;
1929  haveGlobalConstants_ = false;
1930  }
1931 
1932 
1933  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1934  void
1937  {
1938  if (debug_) {
1939  using std::endl;
1940  const char tfecfFuncName[] = "checkInternalState: ";
1941  const char suffix[] = " Please report this bug to the Tpetra developers.";
1942 
1943  std::unique_ptr<std::string> prefix;
1944  if (verbose_) {
1945  prefix = this->createPrefix("CrsGraph", "checkInternalState");
1946  std::ostringstream os;
1947  os << *prefix << "Start" << endl;
1948  std::cerr << os.str();
1949  }
1950 
1951  const global_size_t GSTI = Teuchos::OrdinalTraits<global_size_t>::invalid ();
1952  //const size_t STI = Teuchos::OrdinalTraits<size_t>::invalid (); // unused
1953  // check the internal state of this data structure
1954  // this is called by numerous state-changing methods, in a debug build, to ensure that the object
1955  // always remains in a valid state
1956 
1957  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1958  (this->rowMap_.is_null (), std::logic_error,
1959  "Row Map is null." << suffix);
1960  // This may access the row Map, so we need to check first (above)
1961  // whether the row Map is null.
1962  const LocalOrdinal lclNumRows =
1963  static_cast<LocalOrdinal> (this->getLocalNumRows ());
1964 
1965  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1966  (this->isFillActive () == this->isFillComplete (), std::logic_error,
1967  "Graph cannot be both fill active and fill complete." << suffix);
1968  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1969  (this->isFillComplete () &&
1970  (this->colMap_.is_null () ||
1971  this->rangeMap_.is_null () ||
1972  this->domainMap_.is_null ()),
1973  std::logic_error,
1974  "Graph is full complete, but at least one of {column, range, domain} "
1975  "Map is null." << suffix);
1976  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1977  (this->isStorageOptimized () && ! this->indicesAreAllocated (),
1978  std::logic_error, "Storage is optimized, but indices are not "
1979  "allocated, not even trivially." << suffix);
1980 
1981  size_t nodeAllocSize = 0;
1982  try {
1983  nodeAllocSize = this->getLocalAllocationSize ();
1984  }
1985  catch (std::logic_error& e) {
1986  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1987  (true, std::runtime_error, "getLocalAllocationSize threw "
1988  "std::logic_error: " << e.what ());
1989  }
1990  catch (std::exception& e) {
1991  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1992  (true, std::runtime_error, "getLocalAllocationSize threw an "
1993  "std::exception: " << e.what ());
1994  }
1995  catch (...) {
1996  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1997  (true, std::runtime_error, "getLocalAllocationSize threw an exception "
1998  "not a subclass of std::exception.");
1999  }
2000 
2001  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2002  (this->isStorageOptimized () &&
2003  nodeAllocSize != this->getLocalNumEntries (),
2004  std::logic_error, "Storage is optimized, but "
2005  "this->getLocalAllocationSize() = " << nodeAllocSize
2006  << " != this->getLocalNumEntries() = " << this->getLocalNumEntries ()
2007  << "." << suffix);
2008  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2009  (! this->haveGlobalConstants_ &&
2010  (this->globalNumEntries_ != GSTI ||
2011  this->globalMaxNumRowEntries_ != GSTI),
2012  std::logic_error, "Graph claims not to have global constants, but "
2013  "some of the global constants are not marked as invalid." << suffix);
2014  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2015  (this->haveGlobalConstants_ &&
2016  (this->globalNumEntries_ == GSTI ||
2017  this->globalMaxNumRowEntries_ == GSTI),
2018  std::logic_error, "Graph claims to have global constants, but "
2019  "some of them are marked as invalid." << suffix);
2020  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2021  (this->haveGlobalConstants_ &&
2022  (this->globalNumEntries_ < this->getLocalNumEntries () ||
2023  this->globalMaxNumRowEntries_ < this->nodeMaxNumRowEntries_),
2024  std::logic_error, "Graph claims to have global constants, and "
2025  "all of the values of the global constants are valid, but "
2026  "some of the local constants are greater than "
2027  "their corresponding global constants." << suffix);
2028  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2029  (this->indicesAreAllocated () &&
2030  (this->numAllocForAllRows_ != 0 ||
2031  this->k_numAllocPerRow_.extent (0) != 0),
2032  std::logic_error, "The graph claims that its indices are allocated, but "
2033  "either numAllocForAllRows_ (= " << this->numAllocForAllRows_ << ") is "
2034  "nonzero, or k_numAllocPerRow_ has nonzero dimension. In other words, "
2035  "the graph is supposed to release its \"allocation specifications\" "
2036  "when it allocates its indices." << suffix);
2037  auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
2038  auto rowPtrsUnpacked_dev = this->getRowPtrsUnpackedDevice();
2039  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2040  (rowPtrsUnpacked_host.extent(0) != rowPtrsUnpacked_dev.extent(0),
2041  std::logic_error, "The host and device views of k_rowPtrs_ have "
2042  "different sizes; rowPtrsUnpacked_host_ has size "
2043  << rowPtrsUnpacked_host.extent(0)
2044  << ", but rowPtrsUnpacked_dev_ has size "
2045  << rowPtrsUnpacked_dev.extent(0)
2046  << "." << suffix);
2047  if (isGloballyIndexed() && rowPtrsUnpacked_host.extent(0) != 0) {
2048  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2049  (size_t(rowPtrsUnpacked_host.extent(0)) != size_t(lclNumRows + 1),
2050  std::logic_error, "The graph is globally indexed and "
2051  "k_rowPtrs has nonzero size " << rowPtrsUnpacked_host.extent(0)
2052  << ", but that size does not equal lclNumRows+1 = "
2053  << (lclNumRows+1) << "." << suffix);
2054  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2055  (rowPtrsUnpacked_host(lclNumRows) != size_t(gblInds_wdv.extent(0)),
2056  std::logic_error, "The graph is globally indexed and "
2057  "k_rowPtrs_ has nonzero size " << rowPtrsUnpacked_host.extent(0)
2058  << ", but k_rowPtrs_(lclNumRows=" << lclNumRows << ")="
2059  << rowPtrsUnpacked_host(lclNumRows)
2060  << " != gblInds_wdv.extent(0)="
2061  << gblInds_wdv.extent(0) << "." << suffix);
2062  }
2063  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2064  (this->isLocallyIndexed () &&
2065  rowPtrsUnpacked_host.extent (0) != 0 &&
2066  (static_cast<size_t> (rowPtrsUnpacked_host.extent (0)) !=
2067  static_cast<size_t> (lclNumRows + 1) ||
2068  rowPtrsUnpacked_host(lclNumRows) !=
2069  static_cast<size_t> (this->lclIndsUnpacked_wdv.extent (0))),
2070  std::logic_error, "If k_rowPtrs_ has nonzero size and "
2071  "the graph is locally indexed, then "
2072  "k_rowPtrs_ must have N+1 rows, and "
2073  "k_rowPtrs_(N) must equal lclIndsUnpacked_wdv.extent(0)." << suffix);
2074 
2075  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2076  (this->indicesAreAllocated () &&
2077  nodeAllocSize > 0 &&
2078  this->lclIndsUnpacked_wdv.extent (0) == 0 &&
2079  this->gblInds_wdv.extent (0) == 0,
2080  std::logic_error, "Graph is allocated nontrivially, but "
2081  "but 1-D allocations are not present." << suffix);
2082 
2083  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2084  (! this->indicesAreAllocated () &&
2085  ((rowPtrsUnpacked_host.extent (0) != 0 ||
2086  this->k_numRowEntries_.extent (0) != 0) ||
2087  this->lclIndsUnpacked_wdv.extent (0) != 0 ||
2088  this->gblInds_wdv.extent (0) != 0),
2089  std::logic_error, "If indices are not allocated, "
2090  "then none of the buffers should be." << suffix);
2091  // indices may be local or global only if they are allocated
2092  // (numAllocated is redundant; could simply be indicesAreLocal_ ||
2093  // indicesAreGlobal_)
2094  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2095  ((this->indicesAreLocal_ || this->indicesAreGlobal_) &&
2096  ! this->indicesAreAllocated_,
2097  std::logic_error, "Indices may be local or global only if they are "
2098  "allocated." << suffix);
2099  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2100  (this->indicesAreLocal_ && this->indicesAreGlobal_,
2101  std::logic_error, "Indices may not be both local and global." << suffix);
2102  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2103  (indicesAreLocal_ && gblInds_wdv.extent (0) != 0,
2104  std::logic_error, "Indices are local, but "
2105  "gblInds_wdv.extent(0) (= " << gblInds_wdv.extent (0)
2106  << ") != 0. In other words, if indices are local, then "
2107  "allocations of global indices should not be present."
2108  << suffix);
2109  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2110  (indicesAreGlobal_ && lclIndsUnpacked_wdv.extent (0) != 0,
2111  std::logic_error, "Indices are global, but "
2112  "lclIndsUnpacked_wdv.extent(0) (= " << lclIndsUnpacked_wdv.extent(0)
2113  << ") != 0. In other words, if indices are global, "
2114  "then allocations for local indices should not be present."
2115  << suffix);
2116  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2117  (indicesAreLocal_ && nodeAllocSize > 0 &&
2118  lclIndsUnpacked_wdv.extent (0) == 0 && getLocalNumRows () > 0,
2119  std::logic_error, "Indices are local and "
2120  "getLocalAllocationSize() = " << nodeAllocSize << " > 0, but "
2121  "lclIndsUnpacked_wdv.extent(0) = 0 and getLocalNumRows() = "
2122  << getLocalNumRows () << " > 0." << suffix);
2123  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2124  (indicesAreGlobal_ && nodeAllocSize > 0 &&
2125  gblInds_wdv.extent (0) == 0 && getLocalNumRows () > 0,
2126  std::logic_error, "Indices are global and "
2127  "getLocalAllocationSize() = " << nodeAllocSize << " > 0, but "
2128  "gblInds_wdv.extent(0) = 0 and getLocalNumRows() = "
2129  << getLocalNumRows () << " > 0." << suffix);
2130  // check the actual allocations
2131  if (this->indicesAreAllocated () &&
2132  rowPtrsUnpacked_host.extent (0) != 0) {
2133  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2134  (static_cast<size_t> (rowPtrsUnpacked_host.extent (0)) !=
2135  this->getLocalNumRows () + 1,
2136  std::logic_error, "Indices are allocated and "
2137  "k_rowPtrs_ has nonzero length, but rowPtrsUnpacked_host_.extent(0) = "
2138  << rowPtrsUnpacked_host.extent (0) << " != getLocalNumRows()+1 = "
2139  << (this->getLocalNumRows () + 1) << "." << suffix);
2140  const size_t actualNumAllocated =
2141  rowPtrsUnpacked_host(this->getLocalNumRows());
2142  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2143  (this->isLocallyIndexed () &&
2144  static_cast<size_t> (this->lclIndsUnpacked_wdv.extent (0)) != actualNumAllocated,
2145  std::logic_error, "Graph is locally indexed, indices are "
2146  "are allocated, and k_rowPtrs_ has nonzero length, but "
2147  "lclIndsUnpacked_wdv.extent(0) = " << this->lclIndsUnpacked_wdv.extent (0)
2148  << " != actualNumAllocated = " << actualNumAllocated << suffix);
2149  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2150  (this->isGloballyIndexed () &&
2151  static_cast<size_t> (this->gblInds_wdv.extent (0)) != actualNumAllocated,
2152  std::logic_error, "Graph is globally indexed, indices "
2153  "are allocated, and k_rowPtrs_ has nonzero length, but "
2154  "gblInds_wdv.extent(0) = " << this->gblInds_wdv.extent (0)
2155  << " != actualNumAllocated = " << actualNumAllocated << suffix);
2156  }
2157 
2158  if (verbose_) {
2159  std::ostringstream os;
2160  os << *prefix << "Done" << endl;
2161  std::cerr << os.str();
2162  }
2163  }
2164  }
2165 
2166 
2167  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2168  size_t
2170  getNumEntriesInGlobalRow (GlobalOrdinal globalRow) const
2171  {
2172  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (globalRow);
2173  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2174  return Teuchos::OrdinalTraits<size_t>::invalid ();
2175  }
2176  else {
2177  return rowInfo.numEntries;
2178  }
2179  }
2180 
2181 
2182  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2183  size_t
2185  getNumEntriesInLocalRow (LocalOrdinal localRow) const
2186  {
2187  const RowInfo rowInfo = this->getRowInfo (localRow);
2188  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2189  return Teuchos::OrdinalTraits<size_t>::invalid ();
2190  }
2191  else {
2192  return rowInfo.numEntries;
2193  }
2194  }
2195 
2196 
2197  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2198  size_t
2200  getNumAllocatedEntriesInGlobalRow (GlobalOrdinal globalRow) const
2201  {
2202  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (globalRow);
2203  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2204  return Teuchos::OrdinalTraits<size_t>::invalid ();
2205  }
2206  else {
2207  return rowInfo.allocSize;
2208  }
2209  }
2210 
2211 
2212  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2213  size_t
2215  getNumAllocatedEntriesInLocalRow (LocalOrdinal localRow) const
2216  {
2217  const RowInfo rowInfo = this->getRowInfo (localRow);
2218  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2219  return Teuchos::OrdinalTraits<size_t>::invalid ();
2220  }
2221  else {
2222  return rowInfo.allocSize;
2223  }
2224  }
2225 
2226 
2227  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2228  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::row_ptrs_host_view_type
2231  {
2232  return getRowPtrsPackedHost();
2233  }
2234 
2235  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2236  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::row_ptrs_device_view_type
2239  {
2240  return getRowPtrsPackedDevice();
2241  }
2242 
2243 
2244  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2245  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::local_inds_host_view_type
2248  {
2249  return lclIndsPacked_wdv.getHostView(Access::ReadOnly);
2250  }
2251 
2252  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2256  {
2257  return lclIndsPacked_wdv.getDeviceView(Access::ReadOnly);
2258  }
2259 
2260  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2261  void
2263  getLocalRowCopy (LocalOrdinal localRow,
2264  nonconst_local_inds_host_view_type & indices,
2265  size_t& numEntries) const
2266  {
2267  using Teuchos::ArrayView;
2268  const char tfecfFuncName[] = "getLocalRowCopy: ";
2269 
2270  TEUCHOS_TEST_FOR_EXCEPTION(
2271  isGloballyIndexed () && ! hasColMap (), std::runtime_error,
2272  "Tpetra::CrsGraph::getLocalRowCopy: The graph is globally indexed and "
2273  "does not have a column Map yet. That means we don't have local indices "
2274  "for columns yet, so it doesn't make sense to call this method. If the "
2275  "graph doesn't have a column Map yet, you should call fillComplete on "
2276  "it first.");
2277 
2278  // This does the right thing (reports an empty row) if the input
2279  // row is invalid.
2280  const RowInfo rowinfo = this->getRowInfo (localRow);
2281  // No side effects on error.
2282  const size_t theNumEntries = rowinfo.numEntries;
2283  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2284  (static_cast<size_t> (indices.size ()) < theNumEntries,std::runtime_error,
2285  "Specified storage (size==" << indices.size () << ") does not suffice "
2286  "to hold all " << theNumEntries << " entry/ies for this row.");
2287  numEntries = theNumEntries;
2288 
2289  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2290  if (isLocallyIndexed ()) {
2291  auto lclInds = getLocalIndsViewHost(rowinfo);
2292  for (size_t j = 0; j < theNumEntries; ++j) {
2293  indices[j] = lclInds(j);
2294  }
2295  }
2296  else if (isGloballyIndexed ()) {
2297  auto gblInds = getGlobalIndsViewHost(rowinfo);
2298  for (size_t j = 0; j < theNumEntries; ++j) {
2299  indices[j] = colMap_->getLocalElement (gblInds(j));
2300  }
2301  }
2302  }
2303  }
2304 
2305 
2306  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2307  void
2309  getGlobalRowCopy (GlobalOrdinal globalRow,
2310  nonconst_global_inds_host_view_type &indices,
2311  size_t& numEntries) const
2312  {
2313  using Teuchos::ArrayView;
2314  const char tfecfFuncName[] = "getGlobalRowCopy: ";
2315 
2316  // This does the right thing (reports an empty row) if the input
2317  // row is invalid.
2318  const RowInfo rowinfo = getRowInfoFromGlobalRowIndex (globalRow);
2319  const size_t theNumEntries = rowinfo.numEntries;
2320  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2321  static_cast<size_t> (indices.size ()) < theNumEntries, std::runtime_error,
2322  "Specified storage (size==" << indices.size () << ") does not suffice "
2323  "to hold all " << theNumEntries << " entry/ies for this row.");
2324  numEntries = theNumEntries; // first side effect
2325 
2326  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2327  if (isLocallyIndexed ()) {
2328  auto lclInds = getLocalIndsViewHost(rowinfo);
2329  for (size_t j = 0; j < theNumEntries; ++j) {
2330  indices[j] = colMap_->getGlobalElement (lclInds(j));
2331  }
2332  }
2333  else if (isGloballyIndexed ()) {
2334  auto gblInds = getGlobalIndsViewHost(rowinfo);
2335  for (size_t j = 0; j < theNumEntries; ++j) {
2336  indices[j] = gblInds(j);
2337  }
2338  }
2339  }
2340  }
2341 
2342 
2343  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2344  void
2347  const LocalOrdinal localRow,
2348  local_inds_host_view_type &indices) const
2349  {
2350  const char tfecfFuncName[] = "getLocalRowView: ";
2351 
2352  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2353  (isGloballyIndexed (), std::runtime_error, "The graph's indices are "
2354  "currently stored as global indices, so we cannot return a view with "
2355  "local column indices, whether or not the graph has a column Map. If "
2356  "the graph _does_ have a column Map, use getLocalRowCopy() instead.");
2357 
2358  const RowInfo rowInfo = getRowInfo (localRow);
2359  if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2360  rowInfo.numEntries > 0) {
2361  indices = lclIndsUnpacked_wdv.getHostSubview(rowInfo.offset1D,
2362  rowInfo.numEntries,
2363  Access::ReadOnly);
2364  }
2365  else {
2366  // This does the right thing (reports an empty row) if the input
2367  // row is invalid.
2368  indices = local_inds_host_view_type();
2369  }
2370 
2371  if (debug_) {
2372  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2373  (static_cast<size_t> (indices.size ()) !=
2374  getNumEntriesInLocalRow (localRow), std::logic_error, "indices.size() "
2375  "= " << indices.extent(0) << " != getNumEntriesInLocalRow(localRow=" <<
2376  localRow << ") = " << getNumEntriesInLocalRow(localRow) <<
2377  ". Please report this bug to the Tpetra developers.");
2378  }
2379  }
2380 
2381 
2382  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2383  void
2386  const GlobalOrdinal globalRow,
2387  global_inds_host_view_type &indices) const
2388  {
2389  const char tfecfFuncName[] = "getGlobalRowView: ";
2390 
2391  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2392  (isLocallyIndexed (), std::runtime_error, "The graph's indices are "
2393  "currently stored as local indices, so we cannot return a view with "
2394  "global column indices. Use getGlobalRowCopy() instead.");
2395 
2396  // This does the right thing (reports an empty row) if the input
2397  // row is invalid.
2398  const RowInfo rowInfo = getRowInfoFromGlobalRowIndex (globalRow);
2399  if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2400  rowInfo.numEntries > 0) {
2401  indices = gblInds_wdv.getHostSubview(rowInfo.offset1D,
2402  rowInfo.numEntries,
2403  Access::ReadOnly);
2404  }
2405  else {
2406  indices = typename global_inds_dualv_type::t_host::const_type();
2407  }
2408  if (debug_) {
2409  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2410  (static_cast<size_t> (indices.size ()) !=
2411  getNumEntriesInGlobalRow (globalRow),
2412  std::logic_error, "indices.size() = " << indices.extent(0)
2413  << " != getNumEntriesInGlobalRow(globalRow=" << globalRow << ") = "
2414  << getNumEntriesInGlobalRow (globalRow)
2415  << ". Please report this bug to the Tpetra developers.");
2416  }
2417  }
2418 
2419 
2420  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2421  void
2423  insertLocalIndices (const LocalOrdinal localRow,
2424  const Teuchos::ArrayView<const LocalOrdinal>& indices)
2425  {
2426  const char tfecfFuncName[] = "insertLocalIndices: ";
2427 
2428  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2429  (! isFillActive (), std::runtime_error, "Fill must be active.");
2430  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2431  (isGloballyIndexed (), std::runtime_error,
2432  "Graph indices are global; use insertGlobalIndices().");
2433  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2434  (! hasColMap (), std::runtime_error,
2435  "Cannot insert local indices without a column Map.");
2436  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2437  (! rowMap_->isNodeLocalElement (localRow), std::runtime_error,
2438  "Local row index " << localRow << " is not in the row Map "
2439  "on the calling process.");
2440  if (! indicesAreAllocated ()) {
2441  allocateIndices (LocalIndices, verbose_);
2442  }
2443 
2444  if (debug_) {
2445  // In debug mode, if the graph has a column Map, test whether any
2446  // of the given column indices are not in the column Map. Keep
2447  // track of the invalid column indices so we can tell the user
2448  // about them.
2449  if (hasColMap ()) {
2450  using Teuchos::Array;
2451  using Teuchos::toString;
2452  using std::endl;
2453  typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type size_type;
2454 
2455  const map_type& colMap = *colMap_;
2456  Array<LocalOrdinal> badColInds;
2457  bool allInColMap = true;
2458  for (size_type k = 0; k < indices.size (); ++k) {
2459  if (! colMap.isNodeLocalElement (indices[k])) {
2460  allInColMap = false;
2461  badColInds.push_back (indices[k]);
2462  }
2463  }
2464  if (! allInColMap) {
2465  std::ostringstream os;
2466  os << "Tpetra::CrsGraph::insertLocalIndices: You attempted to insert "
2467  "entries in owned row " << localRow << ", at the following column "
2468  "indices: " << toString (indices) << "." << endl;
2469  os << "Of those, the following indices are not in the column Map on "
2470  "this process: " << toString (badColInds) << "." << endl << "Since "
2471  "the graph has a column Map already, it is invalid to insert entries "
2472  "at those locations.";
2473  TEUCHOS_TEST_FOR_EXCEPTION(! allInColMap, std::invalid_argument, os.str ());
2474  }
2475  }
2476  }
2477 
2478  insertLocalIndicesImpl (localRow, indices);
2479 
2480  if (debug_) {
2481  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2482  (! indicesAreAllocated () || ! isLocallyIndexed (), std::logic_error,
2483  "At the end of insertLocalIndices, ! indicesAreAllocated() || "
2484  "! isLocallyIndexed() is true. Please report this bug to the "
2485  "Tpetra developers.");
2486  }
2487  }
2488 
2489  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2490  void
2492  insertLocalIndices (const LocalOrdinal localRow,
2493  const LocalOrdinal numEnt,
2494  const LocalOrdinal inds[])
2495  {
2496  Teuchos::ArrayView<const LocalOrdinal> indsT (inds, numEnt);
2497  this->insertLocalIndices (localRow, indsT);
2498  }
2499 
2500 
2501  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2502  void
2504  insertGlobalIndices (const GlobalOrdinal gblRow,
2505  const LocalOrdinal numInputInds,
2506  const GlobalOrdinal inputGblColInds[])
2507  {
2508  typedef LocalOrdinal LO;
2509  const char tfecfFuncName[] = "insertGlobalIndices: ";
2510 
2511  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2512  (this->isLocallyIndexed (), std::runtime_error,
2513  "graph indices are local; use insertLocalIndices().");
2514  // This can't really be satisfied for now, because if we are
2515  // fillComplete(), then we are local. In the future, this may
2516  // change. However, the rule that modification require active
2517  // fill will not change.
2518  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2519  (! this->isFillActive (), std::runtime_error,
2520  "You are not allowed to call this method if fill is not active. "
2521  "If fillComplete has been called, you must first call resumeFill "
2522  "before you may insert indices.");
2523  if (! indicesAreAllocated ()) {
2524  allocateIndices (GlobalIndices, verbose_);
2525  }
2526  const LO lclRow = this->rowMap_->getLocalElement (gblRow);
2527  if (lclRow != Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
2528  if (debug_) {
2529  if (this->hasColMap ()) {
2530  using std::endl;
2531  const map_type& colMap = * (this->colMap_);
2532  // In a debug build, keep track of the nonowned ("bad") column
2533  // indices, so that we can display them in the exception
2534  // message. In a release build, just ditch the loop early if
2535  // we encounter a nonowned column index.
2536  std::vector<GlobalOrdinal> badColInds;
2537  bool allInColMap = true;
2538  for (LO k = 0; k < numInputInds; ++k) {
2539  if (! colMap.isNodeGlobalElement (inputGblColInds[k])) {
2540  allInColMap = false;
2541  badColInds.push_back (inputGblColInds[k]);
2542  }
2543  }
2544  if (! allInColMap) {
2545  std::ostringstream os;
2546  os << "You attempted to insert entries in owned row " << gblRow
2547  << ", at the following column indices: [";
2548  for (LO k = 0; k < numInputInds; ++k) {
2549  os << inputGblColInds[k];
2550  if (k + static_cast<LO> (1) < numInputInds) {
2551  os << ",";
2552  }
2553  }
2554  os << "]." << endl << "Of those, the following indices are not in "
2555  "the column Map on this process: [";
2556  for (size_t k = 0; k < badColInds.size (); ++k) {
2557  os << badColInds[k];
2558  if (k + size_t (1) < badColInds.size ()) {
2559  os << ",";
2560  }
2561  }
2562  os << "]." << endl << "Since the matrix has a column Map already, "
2563  "it is invalid to insert entries at those locations.";
2564  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2565  (true, std::invalid_argument, os.str ());
2566  }
2567  }
2568  } // debug_
2569  this->insertGlobalIndicesImpl (lclRow, inputGblColInds, numInputInds);
2570  }
2571  else { // a nonlocal row
2572  this->insertGlobalIndicesIntoNonownedRows (gblRow, inputGblColInds,
2573  numInputInds);
2574  }
2575  }
2576 
2577 
2578  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2579  void
2581  insertGlobalIndices (const GlobalOrdinal gblRow,
2582  const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds)
2583  {
2584  this->insertGlobalIndices (gblRow, inputGblColInds.size (),
2585  inputGblColInds.getRawPtr ());
2586  }
2587 
2588 
2589  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2590  void
2592  insertGlobalIndicesFiltered (const LocalOrdinal lclRow,
2593  const GlobalOrdinal gblColInds[],
2594  const LocalOrdinal numGblColInds)
2595  {
2596  typedef LocalOrdinal LO;
2597  typedef GlobalOrdinal GO;
2598  const char tfecfFuncName[] = "insertGlobalIndicesFiltered: ";
2599 
2600  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2601  (this->isLocallyIndexed (), std::runtime_error,
2602  "Graph indices are local; use insertLocalIndices().");
2603  // This can't really be satisfied for now, because if we are
2604  // fillComplete(), then we are local. In the future, this may
2605  // change. However, the rule that modification require active
2606  // fill will not change.
2607  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2608  (! this->isFillActive (), std::runtime_error,
2609  "You are not allowed to call this method if fill is not active. "
2610  "If fillComplete has been called, you must first call resumeFill "
2611  "before you may insert indices.");
2612  if (! indicesAreAllocated ()) {
2613  allocateIndices (GlobalIndices, verbose_);
2614  }
2615 
2616  Teuchos::ArrayView<const GO> gblColInds_av (gblColInds, numGblColInds);
2617  // If we have a column Map, use it to filter the entries.
2618  if (! colMap_.is_null ()) {
2619  const map_type& colMap = * (this->colMap_);
2620 
2621  LO curOffset = 0;
2622  while (curOffset < numGblColInds) {
2623  // Find a sequence of input indices that are in the column Map
2624  // on the calling process. Doing a sequence at a time,
2625  // instead of one at a time, amortizes some overhead.
2626  LO endOffset = curOffset;
2627  for ( ; endOffset < numGblColInds; ++endOffset) {
2628  const LO lclCol = colMap.getLocalElement (gblColInds[endOffset]);
2629  if (lclCol == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
2630  break; // first entry, in current sequence, not in the column Map
2631  }
2632  }
2633  // curOffset, endOffset: half-exclusive range of indices in
2634  // the column Map on the calling process. If endOffset ==
2635  // curOffset, the range is empty.
2636  const LO numIndInSeq = (endOffset - curOffset);
2637  if (numIndInSeq != 0) {
2638  this->insertGlobalIndicesImpl (lclRow, gblColInds + curOffset,
2639  numIndInSeq);
2640  }
2641  // Invariant before this line: Either endOffset ==
2642  // numGblColInds, or gblColInds[endOffset] is not in the
2643  // column Map on the calling process.
2644  curOffset = endOffset + 1;
2645  }
2646  }
2647  else {
2648  this->insertGlobalIndicesImpl (lclRow, gblColInds_av.getRawPtr (),
2649  gblColInds_av.size ());
2650  }
2651  }
2652 
2653  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2654  void
2656  insertGlobalIndicesIntoNonownedRows (const GlobalOrdinal gblRow,
2657  const GlobalOrdinal gblColInds[],
2658  const LocalOrdinal numGblColInds)
2659  {
2660  // This creates the std::vector if it doesn't exist yet.
2661  // std::map's operator[] does a lookup each time, so it's better
2662  // to pull nonlocals_[grow] out of the loop.
2663  std::vector<GlobalOrdinal>& nonlocalRow = this->nonlocals_[gblRow];
2664  for (LocalOrdinal k = 0; k < numGblColInds; ++k) {
2665  // FIXME (mfh 20 Jul 2017) Would be better to use a set, in
2666  // order to avoid duplicates. globalAssemble() sorts these
2667  // anyway.
2668  nonlocalRow.push_back (gblColInds[k]);
2669  }
2670  }
2671 
2672  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2673  void
2675  removeLocalIndices (LocalOrdinal lrow)
2676  {
2677  const char tfecfFuncName[] = "removeLocalIndices: ";
2678  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2679  ! isFillActive (), std::runtime_error, "requires that fill is active.");
2680  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2681  isStorageOptimized (), std::runtime_error,
2682  "cannot remove indices after optimizeStorage() has been called.");
2683  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2684  isGloballyIndexed (), std::runtime_error, "graph indices are global.");
2685  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2686  ! rowMap_->isNodeLocalElement (lrow), std::runtime_error,
2687  "Local row " << lrow << " is not in the row Map on the calling process.");
2688  if (! indicesAreAllocated ()) {
2689  allocateIndices (LocalIndices, verbose_);
2690  }
2691 
2692  if (k_numRowEntries_.extent (0) != 0) {
2693  this->k_numRowEntries_(lrow) = 0;
2694  }
2695 
2696  if (debug_) {
2697  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2698  (getNumEntriesInLocalRow (lrow) != 0 ||
2699  ! indicesAreAllocated () ||
2700  ! isLocallyIndexed (), std::logic_error,
2701  "Violated stated post-conditions. Please contact Tpetra team.");
2702  }
2703  }
2704 
2705 
2706  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2707  void
2709  setAllIndices (const typename local_graph_device_type::row_map_type& rowPointers,
2710  const typename local_graph_device_type::entries_type::non_const_type& columnIndices)
2711  {
2712  using ProfilingRegion=Details::ProfilingRegion;
2713  ProfilingRegion region ("Tpetra::CrsGraph::setAllIndices");
2714  const char tfecfFuncName[] = "setAllIndices: ";
2715  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2716  ! hasColMap () || getColMap ().is_null (), std::runtime_error,
2717  "The graph must have a column Map before you may call this method.");
2718  LocalOrdinal numLocalRows = this->getLocalNumRows ();
2719  {
2720  LocalOrdinal rowPtrLen = rowPointers.size();
2721  if(numLocalRows == 0) {
2722  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2723  rowPtrLen != 0 && rowPtrLen != 1,
2724  std::runtime_error, "Have 0 local rows, but rowPointers.size() is neither 0 nor 1.");
2725  }
2726  else {
2727  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2728  rowPtrLen != numLocalRows + 1,
2729  std::runtime_error, "rowPointers.size() = " << rowPtrLen <<
2730  " != this->getLocalNumRows()+1 = " << (numLocalRows + 1) << ".");
2731  }
2732  }
2733 
2734  if(debug_) {
2735  using exec_space = typename local_graph_device_type::execution_space;
2736  int columnsOutOfBounds = 0;
2737  local_ordinal_type numLocalCols = this->getLocalNumCols();
2738  Kokkos::parallel_reduce(Kokkos::RangePolicy<exec_space>(0, columnIndices.extent(0)),
2739  KOKKOS_LAMBDA (const LocalOrdinal i, int& lOutOfBounds)
2740  {
2741  if(columnIndices(i) < 0 || columnIndices(i) >= numLocalCols)
2742  lOutOfBounds++;
2743  }, columnsOutOfBounds);
2744  int globalColsOutOfBounds= 0;
2745  auto comm = this->getComm();
2746  Teuchos::reduceAll<int, int> (*comm, Teuchos::REDUCE_MAX, columnsOutOfBounds,
2747  Teuchos::outArg (globalColsOutOfBounds));
2748  if (globalColsOutOfBounds)
2749  {
2750  std::string message;
2751  if (columnsOutOfBounds)
2752  {
2753  //Only print message from ranks with the problem
2754  message = std::string("ERROR, rank ") + std::to_string(comm->getRank()) + ", CrsGraph::setAllIndices(): provided columnIndices are not all within range [0, getLocalNumCols())!\n";
2755  }
2756  Details::gathervPrint(std::cout, message, *comm);
2757  throw std::invalid_argument("CrsGraph::setAllIndices(): columnIndices are out of the valid range on at least one process.");
2758  }
2759  }
2760 
2761  if (debug_ && this->isSorted()) {
2762  // Verify that the local indices are actually sorted
2763  int notSorted = 0;
2764  using exec_space = typename local_graph_device_type::execution_space;
2765  using size_type = typename local_graph_device_type::size_type;
2766  Kokkos::parallel_reduce(Kokkos::RangePolicy<exec_space>(0, numLocalRows),
2767  KOKKOS_LAMBDA (const LocalOrdinal i, int& lNotSorted)
2768  {
2769  size_type rowBegin = rowPointers(i);
2770  size_type rowEnd = rowPointers(i + 1);
2771  for(size_type j = rowBegin + 1; j < rowEnd; j++)
2772  {
2773  if(columnIndices(j - 1) > columnIndices(j))
2774  {
2775  lNotSorted = 1;
2776  }
2777  }
2778  }, notSorted);
2779  //All-reduce notSorted to avoid rank divergence
2780  int globalNotSorted = 0;
2781  auto comm = this->getComm();
2782  Teuchos::reduceAll<int, int> (*comm, Teuchos::REDUCE_MAX, notSorted,
2783  Teuchos::outArg (globalNotSorted));
2784  if (globalNotSorted)
2785  {
2786  std::string message;
2787  if (notSorted)
2788  {
2789  //Only print message from ranks with the problem
2790  message = std::string("ERROR, rank ") + std::to_string(comm->getRank()) + ", CrsGraph::setAllIndices(): provided columnIndices are not sorted!\n";
2791  }
2792  Details::gathervPrint(std::cout, message, *comm);
2793  throw std::invalid_argument("CrsGraph::setAllIndices(): provided columnIndices are not sorted within rows on at least one process.");
2794  }
2795  }
2796 
2797  indicesAreAllocated_ = true;
2798  indicesAreLocal_ = true;
2799  indicesAreSorted_ = true;
2800  noRedundancies_ = true;
2801  lclIndsPacked_wdv= local_inds_wdv_type(columnIndices);
2802  lclIndsUnpacked_wdv = lclIndsPacked_wdv;
2803  setRowPtrs(rowPointers);
2804 
2805  set_need_sync_host_uvm_access(); // columnIndices and rowPointers potentially still in a kernel
2806 
2807  // Storage MUST be packed, since the interface doesn't give any
2808  // way to indicate any extra space at the end of each row.
2809  storageStatus_ = Details::STORAGE_1D_PACKED;
2810 
2811  // These normally get cleared out at the end of allocateIndices.
2812  // It makes sense to clear them out here, because at the end of
2813  // this method, the graph is allocated on the calling process.
2814  numAllocForAllRows_ = 0;
2815  k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
2816 
2817  checkInternalState ();
2818  }
2819 
2820 
2821  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2822  void
2824  setAllIndices (const Teuchos::ArrayRCP<size_t>& rowPointers,
2825  const Teuchos::ArrayRCP<LocalOrdinal>& columnIndices)
2826  {
2827  using Kokkos::View;
2828  typedef typename local_graph_device_type::row_map_type row_map_type;
2829  typedef typename row_map_type::array_layout layout_type;
2830  typedef typename row_map_type::non_const_value_type row_offset_type;
2831  typedef View<size_t*, layout_type , Kokkos::HostSpace,
2832  Kokkos::MemoryUnmanaged> input_view_type;
2833  typedef typename row_map_type::non_const_type nc_row_map_type;
2834 
2835  const size_t size = static_cast<size_t> (rowPointers.size ());
2836  constexpr bool same = std::is_same<size_t, row_offset_type>::value;
2837  input_view_type ptr_in (rowPointers.getRawPtr (), size);
2838 
2839  nc_row_map_type ptr_rot ("Tpetra::CrsGraph::ptr", size);
2840 
2841  // FIXME get rid of the else-clause when the minimum CXX standard required is bumped to C++17
2842 #ifdef KOKKOS_ENABLE_CXX17
2843  if constexpr (same) { // size_t == row_offset_type
2844  using lexecution_space = typename device_type::execution_space;
2845  Kokkos::deep_copy (lexecution_space(),
2846  ptr_rot,
2847  ptr_in);
2848  }
2849 #else
2850  if (same) { // size_t == row_offset_type
2851  // This compile-time logic ensures that the compiler never sees
2852  // an assignment of View<row_offset_type*, ...> to View<size_t*,
2853  // ...> unless size_t == row_offset_type.
2854  input_view_type ptr_decoy (rowPointers.getRawPtr (), size); // never used
2855  // DEEP_COPY REVIEW - HOST-TO-DEVICE
2857  Kokkos::Impl::if_c<same,
2858  nc_row_map_type,
2859  input_view_type>::select (ptr_rot, ptr_decoy),
2860  ptr_in);
2861  }
2862 #endif
2863  else { // size_t != row_offset_type
2864  // CudaUvmSpace != HostSpace, so this will be false in that case.
2865  constexpr bool inHostMemory =
2866  std::is_same<typename row_map_type::memory_space,
2867  Kokkos::HostSpace>::value;
2868  if (inHostMemory) {
2869  // Copy (with cast from size_t to row_offset_type, with bounds
2870  // checking if necessary) to ptr_rot.
2871  ::Tpetra::Details::copyOffsets (ptr_rot, ptr_in);
2872  }
2873  else { // Copy input row offsets to device first.
2874  //
2875  // FIXME (mfh 24 Mar 2015) If CUDA UVM, running in the host's
2876  // execution space would avoid the double copy.
2877  //
2878  View<size_t*, layout_type, device_type> ptr_st ("Tpetra::CrsGraph::ptr", size);
2879 
2880  // DEEP_COPY REVIEW - NOT TESTED
2881  Kokkos::deep_copy (ptr_st, ptr_in);
2882  // Copy on device (casting from size_t to row_offset_type,
2883  // with bounds checking if necessary) to ptr_rot. This
2884  // executes in the output View's execution space, which is the
2885  // same as execution_space.
2886  ::Tpetra::Details::copyOffsets (ptr_rot, ptr_st);
2887  }
2888  }
2889 
2890  Kokkos::View<LocalOrdinal*, layout_type, device_type> k_ind =
2891  Kokkos::Compat::getKokkosViewDeepCopy<device_type> (columnIndices ());
2892  setAllIndices (ptr_rot, k_ind);
2893  }
2894 
2895 
2896  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2897  void
2900  {
2901  using Teuchos::Comm;
2902  using Teuchos::outArg;
2903  using Teuchos::RCP;
2904  using Teuchos::rcp;
2905  using Teuchos::REDUCE_MAX;
2906  using Teuchos::REDUCE_MIN;
2907  using Teuchos::reduceAll;
2908  using std::endl;
2909  using crs_graph_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
2910  using LO = local_ordinal_type;
2911  using GO = global_ordinal_type;
2912  using size_type = typename Teuchos::Array<GO>::size_type;
2913  const char tfecfFuncName[] = "globalAssemble: "; // for exception macro
2914 
2915  std::unique_ptr<std::string> prefix;
2916  if (verbose_) {
2917  prefix = this->createPrefix("CrsGraph", "globalAssemble");
2918  std::ostringstream os;
2919  os << *prefix << "Start" << endl;
2920  std::cerr << os.str();
2921  }
2922  RCP<const Comm<int> > comm = getComm ();
2923 
2924  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2925  (! isFillActive (), std::runtime_error, "Fill must be active before "
2926  "you may call this method.");
2927 
2928  const size_t myNumNonlocalRows = this->nonlocals_.size ();
2929 
2930  // If no processes have nonlocal rows, then we don't have to do
2931  // anything. Checking this is probably cheaper than constructing
2932  // the Map of nonlocal rows (see below) and noticing that it has
2933  // zero global entries.
2934  {
2935  const int iHaveNonlocalRows = (myNumNonlocalRows == 0) ? 0 : 1;
2936  int someoneHasNonlocalRows = 0;
2937  reduceAll<int, int> (*comm, REDUCE_MAX, iHaveNonlocalRows,
2938  outArg (someoneHasNonlocalRows));
2939  if (someoneHasNonlocalRows == 0) {
2940  if (verbose_) {
2941  std::ostringstream os;
2942  os << *prefix << "Done: No nonlocal rows" << endl;
2943  std::cerr << os.str();
2944  }
2945  return;
2946  }
2947  else if (verbose_) {
2948  std::ostringstream os;
2949  os << *prefix << "At least 1 process has nonlocal rows"
2950  << endl;
2951  std::cerr << os.str();
2952  }
2953  }
2954 
2955  // 1. Create a list of the "nonlocal" rows on each process. this
2956  // requires iterating over nonlocals_, so while we do this,
2957  // deduplicate the entries and get a count for each nonlocal
2958  // row on this process.
2959  // 2. Construct a new row Map corresponding to those rows. This
2960  // Map is likely overlapping. We know that the Map is not
2961  // empty on all processes, because the above all-reduce and
2962  // return exclude that case.
2963 
2964  RCP<const map_type> nonlocalRowMap;
2965  // Keep this for CrsGraph's constructor.
2966  Teuchos::Array<size_t> numEntPerNonlocalRow (myNumNonlocalRows);
2967  {
2968  Teuchos::Array<GO> myNonlocalGblRows (myNumNonlocalRows);
2969  size_type curPos = 0;
2970  for (auto mapIter = this->nonlocals_.begin ();
2971  mapIter != this->nonlocals_.end ();
2972  ++mapIter, ++curPos) {
2973  myNonlocalGblRows[curPos] = mapIter->first;
2974  std::vector<GO>& gblCols = mapIter->second; // by ref; change in place
2975  std::sort (gblCols.begin (), gblCols.end ());
2976  auto vecLast = std::unique (gblCols.begin (), gblCols.end ());
2977  gblCols.erase (vecLast, gblCols.end ());
2978  numEntPerNonlocalRow[curPos] = gblCols.size ();
2979  }
2980 
2981  // Currently, Map requires that its indexBase be the global min
2982  // of all its global indices. Map won't compute this for us, so
2983  // we must do it. If our process has no nonlocal rows, set the
2984  // "min" to the max possible GO value. This ensures that if
2985  // some process has at least one nonlocal row, then it will pick
2986  // that up as the min. We know that at least one process has a
2987  // nonlocal row, since the all-reduce and return at the top of
2988  // this method excluded that case.
2989  GO myMinNonlocalGblRow = std::numeric_limits<GO>::max ();
2990  {
2991  auto iter = std::min_element (myNonlocalGblRows.begin (),
2992  myNonlocalGblRows.end ());
2993  if (iter != myNonlocalGblRows.end ()) {
2994  myMinNonlocalGblRow = *iter;
2995  }
2996  }
2997  GO gblMinNonlocalGblRow = 0;
2998  reduceAll<int, GO> (*comm, REDUCE_MIN, myMinNonlocalGblRow,
2999  outArg (gblMinNonlocalGblRow));
3000  const GO indexBase = gblMinNonlocalGblRow;
3001  const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid ();
3002  nonlocalRowMap = rcp (new map_type (INV, myNonlocalGblRows (), indexBase, comm));
3003  }
3004 
3005  if (verbose_) {
3006  std::ostringstream os;
3007  os << *prefix << "nonlocalRowMap->getIndexBase()="
3008  << nonlocalRowMap->getIndexBase() << endl;
3009  std::cerr << os.str();
3010  }
3011 
3012  // 3. Use the column indices for each nonlocal row, as stored in
3013  // nonlocals_, to construct a CrsGraph corresponding to
3014  // nonlocal rows. We need, but we have, exact counts of the
3015  // number of entries in each nonlocal row.
3016 
3017  RCP<crs_graph_type> nonlocalGraph =
3018  rcp(new crs_graph_type(nonlocalRowMap, numEntPerNonlocalRow()));
3019  {
3020  size_type curPos = 0;
3021  for (auto mapIter = this->nonlocals_.begin ();
3022  mapIter != this->nonlocals_.end ();
3023  ++mapIter, ++curPos) {
3024  const GO gblRow = mapIter->first;
3025  std::vector<GO>& gblCols = mapIter->second; // by ref just to avoid copy
3026  const LO numEnt = static_cast<LO> (numEntPerNonlocalRow[curPos]);
3027  nonlocalGraph->insertGlobalIndices (gblRow, numEnt, gblCols.data ());
3028  }
3029  }
3030  if (verbose_) {
3031  std::ostringstream os;
3032  os << *prefix << "Built nonlocal graph" << endl;
3033  std::cerr << os.str();
3034  }
3035  // There's no need to fill-complete the nonlocals graph.
3036  // We just use it as a temporary container for the Export.
3037 
3038  // 4. If the original row Map is one to one, then we can Export
3039  // directly from nonlocalGraph into this. Otherwise, we have
3040  // to create a temporary graph with a one-to-one row Map,
3041  // Export into that, then Import from the temporary graph into
3042  // *this.
3043 
3044  auto origRowMap = this->getRowMap ();
3045  const bool origRowMapIsOneToOne = origRowMap->isOneToOne ();
3046 
3047  if (origRowMapIsOneToOne) {
3048  if (verbose_) {
3049  std::ostringstream os;
3050  os << *prefix << "Original row Map is 1-to-1" << endl;
3051  std::cerr << os.str();
3052  }
3053  export_type exportToOrig (nonlocalRowMap, origRowMap);
3054  this->doExport (*nonlocalGraph, exportToOrig, Tpetra::INSERT);
3055  // We're done at this point!
3056  }
3057  else {
3058  if (verbose_) {
3059  std::ostringstream os;
3060  os << *prefix << "Original row Map is NOT 1-to-1" << endl;
3061  std::cerr << os.str();
3062  }
3063  // If you ask a Map whether it is one to one, it does some
3064  // communication and stashes intermediate results for later use
3065  // by createOneToOne. Thus, calling createOneToOne doesn't cost
3066  // much more then the original cost of calling isOneToOne.
3067  auto oneToOneRowMap = Tpetra::createOneToOne (origRowMap);
3068  export_type exportToOneToOne (nonlocalRowMap, oneToOneRowMap);
3069 
3070  // Create a temporary graph with the one-to-one row Map.
3071  //
3072  // TODO (mfh 09 Sep 2016) Estimate the number of entries in each
3073  // row, to avoid reallocation during the Export operation.
3074  crs_graph_type oneToOneGraph (oneToOneRowMap, 0);
3075 
3076  // Export from graph of nonlocals into the temp one-to-one graph.
3077  if (verbose_) {
3078  std::ostringstream os;
3079  os << *prefix << "Export nonlocal graph" << endl;
3080  std::cerr << os.str();
3081  }
3082  oneToOneGraph.doExport (*nonlocalGraph, exportToOneToOne, Tpetra::INSERT);
3083 
3084  // We don't need the graph of nonlocals anymore, so get rid of
3085  // it, to keep the memory high-water mark down.
3086  nonlocalGraph = Teuchos::null;
3087 
3088  // Import from the one-to-one graph to the original graph.
3089  import_type importToOrig (oneToOneRowMap, origRowMap);
3090  if (verbose_) {
3091  std::ostringstream os;
3092  os << *prefix << "Import nonlocal graph" << endl;
3093  std::cerr << os.str();
3094  }
3095  this->doImport (oneToOneGraph, importToOrig, Tpetra::INSERT);
3096  }
3097 
3098  // It's safe now to clear out nonlocals_, since we've already
3099  // committed side effects to *this. The standard idiom for
3100  // clearing a Container like std::map, is to swap it with an empty
3101  // Container and let the swapped Container fall out of scope.
3102  decltype (this->nonlocals_) newNonlocals;
3103  std::swap (this->nonlocals_, newNonlocals);
3104 
3105  checkInternalState ();
3106  if (verbose_) {
3107  std::ostringstream os;
3108  os << *prefix << "Done" << endl;
3109  std::cerr << os.str();
3110  }
3111  }
3112 
3113 
3114  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3115  void
3117  resumeFill (const Teuchos::RCP<Teuchos::ParameterList>& params)
3118  {
3119  clearGlobalConstants();
3120  if (params != Teuchos::null) this->setParameterList (params);
3121  // either still sorted/merged or initially sorted/merged
3122  indicesAreSorted_ = true;
3123  noRedundancies_ = true;
3124  fillComplete_ = false;
3125  }
3126 
3127 
3128  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3129  void
3131  fillComplete (const Teuchos::RCP<Teuchos::ParameterList>& params)
3132  {
3133  // If the graph already has domain and range Maps, don't clobber
3134  // them. If it doesn't, use the current row Map for both the
3135  // domain and range Maps.
3136  //
3137  // NOTE (mfh 28 Sep 2014): If the graph was constructed without a
3138  // column Map, and column indices are inserted which are not in
3139  // the row Map on any process, this will cause troubles. However,
3140  // that is not a common case for most applications that we
3141  // encounter, and checking for it might require more
3142  // communication.
3143  Teuchos::RCP<const map_type> domMap = this->getDomainMap ();
3144  if (domMap.is_null ()) {
3145  domMap = this->getRowMap ();
3146  }
3147  Teuchos::RCP<const map_type> ranMap = this->getRangeMap ();
3148  if (ranMap.is_null ()) {
3149  ranMap = this->getRowMap ();
3150  }
3151  this->fillComplete (domMap, ranMap, params);
3152  }
3153 
3154 
3155  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3156  void
3158  fillComplete (const Teuchos::RCP<const map_type>& domainMap,
3159  const Teuchos::RCP<const map_type>& rangeMap,
3160  const Teuchos::RCP<Teuchos::ParameterList>& params)
3161  {
3162  using std::endl;
3163  const char tfecfFuncName[] = "fillComplete: ";
3164  const bool verbose = verbose_;
3165 
3166  std::unique_ptr<std::string> prefix;
3167  if (verbose) {
3168  prefix = this->createPrefix("CrsGraph", "fillComplete");
3169  std::ostringstream os;
3170  os << *prefix << "Start" << endl;
3171  std::cerr << os.str();
3172  }
3173 
3174  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3175  (! isFillActive () || isFillComplete (), std::runtime_error,
3176  "Graph fill state must be active (isFillActive() "
3177  "must be true) before calling fillComplete().");
3178 
3179  const int numProcs = getComm ()->getSize ();
3180 
3181  //
3182  // Read and set parameters
3183  //
3184 
3185  // Does the caller want to sort remote GIDs (within those owned by
3186  // the same process) in makeColMap()?
3187  if (! params.is_null ()) {
3188  if (params->isParameter ("sort column map ghost gids")) {
3189  sortGhostsAssociatedWithEachProcessor_ =
3190  params->get<bool> ("sort column map ghost gids",
3191  sortGhostsAssociatedWithEachProcessor_);
3192  }
3193  else if (params->isParameter ("Sort column Map ghost GIDs")) {
3194  sortGhostsAssociatedWithEachProcessor_ =
3195  params->get<bool> ("Sort column Map ghost GIDs",
3196  sortGhostsAssociatedWithEachProcessor_);
3197  }
3198  }
3199 
3200  // If true, the caller promises that no process did nonlocal
3201  // changes since the last call to fillComplete.
3202  bool assertNoNonlocalInserts = false;
3203  if (! params.is_null ()) {
3204  assertNoNonlocalInserts =
3205  params->get<bool> ("No Nonlocal Changes", assertNoNonlocalInserts);
3206  }
3207 
3208  //
3209  // Allocate indices, if they haven't already been allocated
3210  //
3211  if (! indicesAreAllocated ()) {
3212  if (hasColMap ()) {
3213  // We have a column Map, so use local indices.
3214  allocateIndices (LocalIndices, verbose);
3215  } else {
3216  // We don't have a column Map, so use global indices.
3217  allocateIndices (GlobalIndices, verbose);
3218  }
3219  }
3220 
3221  //
3222  // Do global assembly, if requested and if the communicator
3223  // contains more than one process.
3224  //
3225  const bool mayNeedGlobalAssemble = ! assertNoNonlocalInserts && numProcs > 1;
3226  if (mayNeedGlobalAssemble) {
3227  // This first checks if we need to do global assembly.
3228  // The check costs a single all-reduce.
3229  globalAssemble ();
3230  }
3231  else {
3232  const size_t numNonlocals = nonlocals_.size();
3233  if (verbose) {
3234  std::ostringstream os;
3235  os << *prefix << "Do not need to call globalAssemble; "
3236  "assertNoNonlocalInserts="
3237  << (assertNoNonlocalInserts ? "true" : "false")
3238  << "numProcs=" << numProcs
3239  << ", nonlocals_.size()=" << numNonlocals << endl;
3240  std::cerr << os.str();
3241  }
3242  const int lclNeededGlobalAssemble =
3243  (numProcs > 1 && numNonlocals != 0) ? 1 : 0;
3244  if (lclNeededGlobalAssemble != 0 && verbose) {
3245  std::ostringstream os;
3246  os << *prefix;
3247  Details::Impl::verbosePrintMap(
3248  os, nonlocals_.begin(), nonlocals_.end(),
3249  nonlocals_.size(), "nonlocals_");
3250  std::cerr << os.str() << endl;
3251  }
3252 
3253  if (debug_) {
3254  auto map = this->getMap();
3255  auto comm = map.is_null() ? Teuchos::null : map->getComm();
3256  int gblNeededGlobalAssemble = lclNeededGlobalAssemble;
3257  if (! comm.is_null()) {
3258  using Teuchos::REDUCE_MAX;
3259  using Teuchos::reduceAll;
3260  reduceAll(*comm, REDUCE_MAX, lclNeededGlobalAssemble,
3261  Teuchos::outArg(gblNeededGlobalAssemble));
3262  }
3263  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3264  (gblNeededGlobalAssemble != 0, std::runtime_error,
3265  "nonlocals_.size()=" << numNonlocals << " != 0 on at "
3266  "least one process in the CrsGraph's communicator. This "
3267  "means either that you incorrectly set the "
3268  "\"No Nonlocal Changes\" fillComplete parameter to true, "
3269  "or that you inserted invalid entries. "
3270  "Rerun with the environment variable TPETRA_VERBOSE="
3271  "CrsGraph set to see the entries of nonlocals_ on every "
3272  "MPI process (WARNING: lots of output).");
3273  }
3274  else {
3275  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3276  (lclNeededGlobalAssemble != 0, std::runtime_error,
3277  "nonlocals_.size()=" << numNonlocals << " != 0 on the "
3278  "calling process. This means either that you incorrectly "
3279  "set the \"No Nonlocal Changes\" fillComplete parameter "
3280  "to true, or that you inserted invalid entries. "
3281  "Rerun with the environment "
3282  "variable TPETRA_VERBOSE=CrsGraph set to see the entries "
3283  "of nonlocals_ on every MPI process (WARNING: lots of "
3284  "output).");
3285  }
3286  }
3287 
3288  // Set domain and range Map. This may clear the Import / Export
3289  // objects if the new Maps differ from any old ones.
3290  setDomainRangeMaps (domainMap, rangeMap);
3291 
3292  // If the graph does not already have a column Map (either from
3293  // the user constructor calling the version of the constructor
3294  // that takes a column Map, or from a previous fillComplete call),
3295  // then create it.
3296  Teuchos::Array<int> remotePIDs (0);
3297  const bool mustBuildColMap = ! this->hasColMap ();
3298  if (mustBuildColMap) {
3299  this->makeColMap (remotePIDs); // resized on output
3300  }
3301 
3302  // Make indices local, if they aren't already.
3303  // The method doesn't do any work if the indices are already local.
3304  const std::pair<size_t, std::string> makeIndicesLocalResult =
3305  this->makeIndicesLocal(verbose);
3306 
3307  if (debug_) {
3308  using Details::gathervPrint;
3309  using Teuchos::RCP;
3310  using Teuchos::REDUCE_MIN;
3311  using Teuchos::reduceAll;
3312  using Teuchos::outArg;
3313 
3314  RCP<const map_type> map = this->getMap ();
3315  RCP<const Teuchos::Comm<int> > comm;
3316  if (! map.is_null ()) {
3317  comm = map->getComm ();
3318  }
3319  if (comm.is_null ()) {
3320  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3321  (makeIndicesLocalResult.first != 0, std::runtime_error,
3322  makeIndicesLocalResult.second);
3323  }
3324  else {
3325  const int lclSuccess = (makeIndicesLocalResult.first == 0);
3326  int gblSuccess = 0; // output argument
3327  reduceAll (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
3328  if (gblSuccess != 1) {
3329  std::ostringstream os;
3330  gathervPrint (os, makeIndicesLocalResult.second, *comm);
3331  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3332  (true, std::runtime_error, os.str ());
3333  }
3334  }
3335  }
3336  else {
3337  // TODO (mfh 20 Jul 2017) Instead of throwing here, pass along
3338  // the error state to makeImportExport or
3339  // computeGlobalConstants, which may do all-reduces and thus may
3340  // have the opportunity to communicate that error state.
3341  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3342  (makeIndicesLocalResult.first != 0, std::runtime_error,
3343  makeIndicesLocalResult.second);
3344  }
3345 
3346  // If this process has no indices, then CrsGraph considers it
3347  // already trivially sorted and merged. Thus, this method need
3348  // not be called on all processes in the row Map's communicator.
3349  this->sortAndMergeAllIndices (this->isSorted (), this->isMerged ());
3350 
3351  // Make Import and Export objects, if they haven't been made
3352  // already. If we made a column Map above, reuse information from
3353  // that process to avoid communiation in the Import setup.
3354  this->makeImportExport (remotePIDs, mustBuildColMap);
3355 
3356  // Create the Kokkos::StaticCrsGraph, if it doesn't already exist.
3357  this->fillLocalGraph (params);
3358 
3359  const bool callComputeGlobalConstants = params.get () == nullptr ||
3360  params->get ("compute global constants", true);
3361  if (callComputeGlobalConstants) {
3362  this->computeGlobalConstants ();
3363  }
3364  else {
3365  this->computeLocalConstants ();
3366  }
3367  this->fillComplete_ = true;
3368  this->checkInternalState ();
3369 
3370  if (verbose) {
3371  std::ostringstream os;
3372  os << *prefix << "Done" << endl;
3373  std::cerr << os.str();
3374  }
3375  }
3376 
3377 
3378  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3379  void
3381  expertStaticFillComplete (const Teuchos::RCP<const map_type>& domainMap,
3382  const Teuchos::RCP<const map_type>& rangeMap,
3383  const Teuchos::RCP<const import_type>& importer,
3384  const Teuchos::RCP<const export_type>& exporter,
3385  const Teuchos::RCP<Teuchos::ParameterList>& params)
3386  {
3387  const char tfecfFuncName[] = "expertStaticFillComplete: ";
3388 #ifdef HAVE_TPETRA_MMM_TIMINGS
3389  std::string label;
3390  if(!params.is_null())
3391  label = params->get("Timer Label",label);
3392  std::string prefix = std::string("Tpetra ")+ label + std::string(": ");
3393  using Teuchos::TimeMonitor;
3394  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Setup"))));
3395 #endif
3396 
3397 
3398  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3399  domainMap.is_null () || rangeMap.is_null (),
3400  std::runtime_error, "The input domain Map and range Map must be nonnull.");
3401  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3402  isFillComplete () || ! hasColMap (), std::runtime_error, "You may not "
3403  "call this method unless the graph has a column Map.");
3404  auto rowPtrsUnpackedLength = this->getRowPtrsUnpackedHost().extent (0);
3405  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3406  getLocalNumRows () > 0 && rowPtrsUnpackedLength == 0,
3407  std::runtime_error, "The calling process has getLocalNumRows() = "
3408  << getLocalNumRows () << " > 0 rows, but the row offsets array has not "
3409  "been set.");
3410  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3411  static_cast<size_t> (rowPtrsUnpackedLength) != getLocalNumRows () + 1,
3412  std::runtime_error, "The row offsets array has length " <<
3413  rowPtrsUnpackedLength << " != getLocalNumRows()+1 = " <<
3414  (getLocalNumRows () + 1) << ".");
3415 
3416  // Note: We don't need to do the following things which are normally done in fillComplete:
3417  // allocateIndices, globalAssemble, makeColMap, makeIndicesLocal, sortAndMergeAllIndices
3418 
3419  // Constants from allocateIndices
3420  //
3421  // mfh 08 Aug 2014: numAllocForAllRows_ and k_numAllocPerRow_ go
3422  // away once the graph is allocated. expertStaticFillComplete
3423  // either presumes that the graph is allocated, or "allocates" it.
3424  //
3425  // FIXME (mfh 08 Aug 2014) The goal for the Kokkos refactor
3426  // version of CrsGraph is to allocate in the constructor, not
3427  // lazily on first insert. That will make both
3428  // numAllocForAllRows_ and k_numAllocPerRow_ obsolete.
3429  numAllocForAllRows_ = 0;
3430  k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
3431  indicesAreAllocated_ = true;
3432 
3433  // Constants from makeIndicesLocal
3434  //
3435  // The graph has a column Map, so its indices had better be local.
3436  indicesAreLocal_ = true;
3437  indicesAreGlobal_ = false;
3438 
3439  // set domain/range map: may clear the import/export objects
3440 #ifdef HAVE_TPETRA_MMM_TIMINGS
3441  MM = Teuchos::null;
3442  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Maps"))));
3443 #endif
3444  setDomainRangeMaps (domainMap, rangeMap);
3445 
3446  // Presume the user sorted and merged the arrays first
3447  indicesAreSorted_ = true;
3448  noRedundancies_ = true;
3449 
3450  // makeImportExport won't create a new importer/exporter if I set one here first.
3451 #ifdef HAVE_TPETRA_MMM_TIMINGS
3452  MM = Teuchos::null;
3453  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckI"))));
3454 #endif
3455 
3456  importer_ = Teuchos::null;
3457  exporter_ = Teuchos::null;
3458  if (importer != Teuchos::null) {
3459  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3460  ! importer->getSourceMap ()->isSameAs (*getDomainMap ()) ||
3461  ! importer->getTargetMap ()->isSameAs (*getColMap ()),
3462  std::invalid_argument,": importer does not match matrix maps.");
3463  importer_ = importer;
3464 
3465  }
3466 
3467 #ifdef HAVE_TPETRA_MMM_TIMINGS
3468  MM = Teuchos::null;
3469  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckE"))));
3470 #endif
3471 
3472  if (exporter != Teuchos::null) {
3473  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3474  ! exporter->getSourceMap ()->isSameAs (*getRowMap ()) ||
3475  ! exporter->getTargetMap ()->isSameAs (*getRangeMap ()),
3476  std::invalid_argument,": exporter does not match matrix maps.");
3477  exporter_ = exporter;
3478  }
3479 
3480 #ifdef HAVE_TPETRA_MMM_TIMINGS
3481  MM = Teuchos::null;
3482  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXmake"))));
3483 #endif
3484  Teuchos::Array<int> remotePIDs (0); // unused output argument
3485  this->makeImportExport (remotePIDs, false);
3486 
3487 #ifdef HAVE_TPETRA_MMM_TIMINGS
3488  MM = Teuchos::null;
3489  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-fLG"))));
3490 #endif
3491  this->fillLocalGraph (params);
3492 
3493  const bool callComputeGlobalConstants = params.get () == nullptr ||
3494  params->get ("compute global constants", true);
3495 
3496  if (callComputeGlobalConstants) {
3497 #ifdef HAVE_TPETRA_MMM_TIMINGS
3498  MM = Teuchos::null;
3499  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cGC (const)"))));
3500 #endif // HAVE_TPETRA_MMM_TIMINGS
3501  this->computeGlobalConstants ();
3502  }
3503  else {
3504 #ifdef HAVE_TPETRA_MMM_TIMINGS
3505  MM = Teuchos::null;
3506  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cGC (noconst)"))));
3507 #endif // HAVE_TPETRA_MMM_TIMINGS
3508  this->computeLocalConstants ();
3509  }
3510 
3511  fillComplete_ = true;
3512 
3513 #ifdef HAVE_TPETRA_MMM_TIMINGS
3514  MM = Teuchos::null;
3515  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cIS"))));
3516 #endif
3517  checkInternalState ();
3518  }
3519 
3520 
3521  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3522  void
3524  fillLocalGraph (const Teuchos::RCP<Teuchos::ParameterList>& params)
3525  {
3527  typedef typename local_graph_device_type::row_map_type row_map_type;
3528  typedef typename row_map_type::non_const_type non_const_row_map_type;
3529  typedef typename local_graph_device_type::entries_type::non_const_type lclinds_1d_type;
3530  const char tfecfFuncName[] = "fillLocalGraph (called from fillComplete or "
3531  "expertStaticFillComplete): ";
3532  const size_t lclNumRows = this->getLocalNumRows ();
3533 
3534  // This method's goal is to fill in the two arrays (compressed
3535  // sparse row format) that define the sparse graph's structure.
3536 
3537  bool requestOptimizedStorage = true;
3538  if (! params.is_null () && ! params->get ("Optimize Storage", true)) {
3539  requestOptimizedStorage = false;
3540  }
3541 
3542  // The graph's column indices are currently stored in a 1-D
3543  // format, with row offsets in rowPtrsUnpacked_host_ and local column indices
3544  // in k_lclInds1D_.
3545 
3546  if (debug_) {
3547  auto rowPtrsUnpacked = this->getRowPtrsUnpackedHost();
3548  // The graph's array of row offsets must already be allocated.
3549  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3550  (rowPtrsUnpacked.extent (0) == 0, std::logic_error,
3551  "rowPtrsUnpacked_host_ has size zero, but shouldn't");
3552  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3553  (rowPtrsUnpacked.extent (0) != lclNumRows + 1, std::logic_error,
3554  "rowPtrsUnpacked_host_.extent(0) = "
3555  << rowPtrsUnpacked.extent (0) << " != (lclNumRows + 1) = "
3556  << (lclNumRows + 1) << ".");
3557  const size_t numOffsets = rowPtrsUnpacked.extent (0);
3558  const auto valToCheck = rowPtrsUnpacked(numOffsets-1);
3559  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3560  (numOffsets != 0 &&
3561  lclIndsUnpacked_wdv.extent (0) != valToCheck,
3562  std::logic_error, "numOffsets=" << numOffsets << " != 0 "
3563  " and lclIndsUnpacked_wdv.extent(0)=" << lclIndsUnpacked_wdv.extent(0)
3564  << " != rowPtrsUnpacked_host_(" << numOffsets << ")=" << valToCheck
3565  << ".");
3566  }
3567 
3568  size_t allocSize = 0;
3569  try {
3570  allocSize = this->getLocalAllocationSize ();
3571  }
3572  catch (std::logic_error& e) {
3573  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3574  (true, std::logic_error, "getLocalAllocationSize threw "
3575  "std::logic_error: " << e.what ());
3576  }
3577  catch (std::runtime_error& e) {
3578  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3579  (true, std::runtime_error, "getLocalAllocationSize threw "
3580  "std::runtime_error: " << e.what ());
3581  }
3582  catch (std::exception& e) {
3583  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3584  (true, std::runtime_error, "getLocalAllocationSize threw "
3585  "std::exception: " << e.what ());
3586  }
3587  catch (...) {
3588  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3589  (true, std::runtime_error, "getLocalAllocationSize threw "
3590  "an exception not a subclass of std::exception.");
3591  }
3592 
3593  if (this->getLocalNumEntries () != allocSize) {
3594  // Use the nonconst version of row_map_type for ptr_d, because
3595  // the latter is const and we need to modify ptr_d here.
3596  non_const_row_map_type ptr_d;
3597  row_map_type ptr_d_const;
3598 
3599  // The graph's current 1-D storage is "unpacked." This means
3600  // the row offsets may differ from what the final row offsets
3601  // should be. This could happen, for example, if the user set
3602  // an upper bound on the number of entries in each row, but
3603  // didn't fill all those entries.
3604 
3605  if (debug_) {
3606  auto rowPtrsUnpacked = this->getRowPtrsUnpackedHost();
3607  if (rowPtrsUnpacked.extent (0) != 0) {
3608  const size_t numOffsets =
3609  static_cast<size_t> (rowPtrsUnpacked.extent (0));
3610  const auto valToCheck = rowPtrsUnpacked(numOffsets - 1);
3611  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3612  (valToCheck != size_t(lclIndsUnpacked_wdv.extent(0)),
3613  std::logic_error, "(Unpacked branch) Before allocating "
3614  "or packing, k_rowPtrs_(" << (numOffsets-1) << ")="
3615  << valToCheck << " != lclIndsUnpacked_wdv.extent(0)="
3616  << lclIndsUnpacked_wdv.extent (0) << ".");
3617  }
3618  }
3619 
3620  // Pack the row offsets into ptr_d, by doing a sum-scan of the
3621  // array of valid entry counts per row (k_numRowEntries_).
3622 
3623  // Total number of entries in the matrix on the calling
3624  // process. We will compute this in the loop below. It's
3625  // cheap to compute and useful as a sanity check.
3626  size_t lclTotalNumEntries = 0;
3627  {
3628  // Allocate the packed row offsets array.
3629  ptr_d =
3630  non_const_row_map_type ("Tpetra::CrsGraph::ptr", lclNumRows + 1);
3631  ptr_d_const = ptr_d;
3632 
3633  // It's ok that k_numRowEntries_ is a host View; the
3634  // function can handle this.
3635  typename num_row_entries_type::const_type numRowEnt_h = k_numRowEntries_;
3636  if (debug_) {
3637  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3638  (size_t(numRowEnt_h.extent (0)) != lclNumRows,
3639  std::logic_error, "(Unpacked branch) "
3640  "numRowEnt_h.extent(0)=" << numRowEnt_h.extent(0)
3641  << " != getLocalNumRows()=" << lclNumRows << "");
3642  }
3643 
3644  lclTotalNumEntries = computeOffsetsFromCounts (ptr_d, numRowEnt_h);
3645 
3646  if (debug_) {
3647  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3648  (static_cast<size_t> (ptr_d.extent (0)) != lclNumRows + 1,
3649  std::logic_error, "(Unpacked branch) After allocating "
3650  "ptr_d, ptr_d.extent(0) = " << ptr_d.extent(0)
3651  << " != lclNumRows+1 = " << (lclNumRows+1) << ".");
3652  const auto valToCheck =
3653  ::Tpetra::Details::getEntryOnHost (ptr_d, lclNumRows);
3654  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3655  (valToCheck != lclTotalNumEntries, std::logic_error,
3656  "Tpetra::CrsGraph::fillLocalGraph: In unpacked branch, "
3657  "after filling ptr_d, ptr_d(lclNumRows=" << lclNumRows
3658  << ") = " << valToCheck << " != total number of entries "
3659  "on the calling process = " << lclTotalNumEntries
3660  << ".");
3661  }
3662  }
3663 
3664  // Allocate the array of packed column indices.
3665  lclinds_1d_type ind_d =
3666  lclinds_1d_type ("Tpetra::CrsGraph::lclInd", lclTotalNumEntries);
3667 
3668  // k_rowPtrs_ and lclIndsUnpacked_wdv are currently unpacked. Pack
3669  // them, using the packed row offsets array ptr_d that we
3670  // created above.
3671  //
3672  // FIXME (mfh 08 Aug 2014) If "Optimize Storage" is false (in
3673  // CrsMatrix?), we need to keep around the unpacked row
3674  // offsets and column indices.
3675 
3676  // Pack the column indices from unpacked lclIndsUnpacked_wdv into
3677  // packed ind_d. We will replace lclIndsUnpacked_wdv below.
3678  typedef pack_functor<
3679  typename local_graph_device_type::entries_type::non_const_type,
3680  typename local_inds_dualv_type::t_dev::const_type,
3681  row_map_type,
3682  typename local_graph_device_type::row_map_type> inds_packer_type;
3683  inds_packer_type f (ind_d,
3684  lclIndsUnpacked_wdv.getDeviceView(Access::ReadOnly),
3685  ptr_d, this->getRowPtrsUnpackedDevice());
3686  {
3687  typedef typename decltype (ind_d)::execution_space exec_space;
3688  typedef Kokkos::RangePolicy<exec_space, LocalOrdinal> range_type;
3689  Kokkos::parallel_for (range_type (0, lclNumRows), f);
3690  }
3691 
3692  if (debug_) {
3693  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3694  (ptr_d.extent (0) == 0, std::logic_error,
3695  "(\"Optimize Storage\"=true branch) After packing, "
3696  "ptr_d.extent(0)=0.");
3697  if (ptr_d.extent (0) != 0) {
3698  const size_t numOffsets = static_cast<size_t> (ptr_d.extent (0));
3699  const auto valToCheck =
3700  ::Tpetra::Details::getEntryOnHost (ptr_d, numOffsets - 1);
3701  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3702  (static_cast<size_t> (valToCheck) != ind_d.extent (0),
3703  std::logic_error, "(\"Optimize Storage\"=true branch) "
3704  "After packing, ptr_d(" << (numOffsets-1) << ")="
3705  << valToCheck << " != ind_d.extent(0)="
3706  << ind_d.extent(0) << ".");
3707  }
3708  }
3709  // Build the local graph.
3710  if (requestOptimizedStorage)
3711  setRowPtrs(ptr_d_const);
3712  else
3713  setRowPtrsPacked(ptr_d_const);
3714  lclIndsPacked_wdv = local_inds_wdv_type(ind_d);
3715  }
3716  else { // We don't have to pack, so just set the pointers.
3717  //Set both packed and unpacked rowptrs to this
3718  this->setRowPtrs(rowPtrsUnpacked_dev_);
3719  lclIndsPacked_wdv = lclIndsUnpacked_wdv;
3720 
3721  if (debug_) {
3722  auto rowPtrsPacked_dev = this->getRowPtrsPackedDevice();
3723  auto rowPtrsPacked_host = this->getRowPtrsPackedHost();
3724  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3725  (rowPtrsPacked_dev.extent (0) == 0, std::logic_error,
3726  "(\"Optimize Storage\"=false branch) "
3727  "rowPtrsPacked_dev_.extent(0) = 0.");
3728  if (rowPtrsPacked_dev.extent (0) != 0) {
3729  const size_t numOffsets =
3730  static_cast<size_t> (rowPtrsPacked_dev.extent (0));
3731  const size_t valToCheck =
3732  rowPtrsPacked_host(numOffsets - 1);
3733  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3734  (valToCheck != size_t(lclIndsPacked_wdv.extent (0)),
3735  std::logic_error, "(\"Optimize Storage\"=false branch) "
3736  "rowPtrsPacked_dev_(" << (numOffsets-1) << ")="
3737  << valToCheck
3738  << " != lclIndsPacked_wdv.extent(0)="
3739  << lclIndsPacked_wdv.extent (0) << ".");
3740  }
3741  }
3742  }
3743 
3744  if (debug_) {
3745  auto rowPtrsPacked_dev = this->getRowPtrsPackedDevice();
3746  auto rowPtrsPacked_host = this->getRowPtrsPackedHost();
3747  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3748  (static_cast<size_t> (rowPtrsPacked_dev.extent (0)) != lclNumRows + 1,
3749  std::logic_error, "After packing, rowPtrsPacked_dev_.extent(0) = " <<
3750  rowPtrsPacked_dev.extent (0) << " != lclNumRows+1 = " << (lclNumRows+1)
3751  << ".");
3752  if (rowPtrsPacked_dev.extent (0) != 0) {
3753  const size_t numOffsets = static_cast<size_t> (rowPtrsPacked_dev.extent (0));
3754  const auto valToCheck = rowPtrsPacked_host(numOffsets - 1);
3755  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3756  (static_cast<size_t> (valToCheck) != lclIndsPacked_wdv.extent (0),
3757  std::logic_error, "After packing, rowPtrsPacked_dev_(" << (numOffsets-1)
3758  << ") = " << valToCheck << " != lclIndsPacked_wdv.extent(0) = "
3759  << lclIndsPacked_wdv.extent (0) << ".");
3760  }
3761  }
3762 
3763  if (requestOptimizedStorage) {
3764  // With optimized storage, we don't need to store
3765  // the array of row entry counts.
3766 
3767  // Free graph data structures that are only needed for
3768  // unpacked 1-D storage.
3769  k_numRowEntries_ = num_row_entries_type ();
3770 
3771  // Keep the new 1-D packed allocations.
3772  lclIndsUnpacked_wdv = lclIndsPacked_wdv;
3773 
3774  storageStatus_ = Details::STORAGE_1D_PACKED;
3775  }
3776 
3777  set_need_sync_host_uvm_access(); // make sure kernel setup of indices is fenced before a host access
3778  }
3779 
3780  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3781  void
3783  replaceColMap (const Teuchos::RCP<const map_type>& newColMap)
3784  {
3785  // NOTE: This safety check matches the code, but not the documentation of Crsgraph
3786  //
3787  // FIXME (mfh 18 Aug 2014) This will break if the calling process
3788  // has no entries, because in that case, currently it is neither
3789  // locally nor globally indexed. This will change once we get rid
3790  // of lazy allocation (so that the constructor allocates indices
3791  // and therefore commits to local vs. global).
3792  const char tfecfFuncName[] = "replaceColMap: ";
3793  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3794  isLocallyIndexed () || isGloballyIndexed (), std::runtime_error,
3795  "Requires matching maps and non-static graph.");
3796  colMap_ = newColMap;
3797  }
3798 
3799  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3800  void
3802  reindexColumns (const Teuchos::RCP<const map_type>& newColMap,
3803  const Teuchos::RCP<const import_type>& newImport,
3804  const bool sortIndicesInEachRow)
3805  {
3806  using Teuchos::REDUCE_MIN;
3807  using Teuchos::reduceAll;
3808  using Teuchos::RCP;
3809  typedef GlobalOrdinal GO;
3810  typedef LocalOrdinal LO;
3811  typedef typename local_inds_dualv_type::t_host col_inds_type;
3812  const char tfecfFuncName[] = "reindexColumns: ";
3813 
3814  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3815  isFillComplete (), std::runtime_error, "The graph is fill complete "
3816  "(isFillComplete() returns true). You must call resumeFill() before "
3817  "you may call this method.");
3818 
3819  // mfh 19 Aug 2014: This method does NOT redistribute data; it
3820  // doesn't claim to do the work of an Import or Export. This
3821  // means that for all processes, the calling process MUST own all
3822  // column indices, in both the old column Map (if it exists) and
3823  // the new column Map. We check this via an all-reduce.
3824  //
3825  // Some processes may be globally indexed, others may be locally
3826  // indexed, and others (that have no graph entries) may be
3827  // neither. This method will NOT change the graph's current
3828  // state. If it's locally indexed, it will stay that way, and
3829  // vice versa. It would easy to add an option to convert indices
3830  // from global to local, so as to save a global-to-local
3831  // conversion pass. However, we don't do this here. The intended
3832  // typical use case is that the graph already has a column Map and
3833  // is locally indexed, and this is the case for which we optimize.
3834 
3835  const LO lclNumRows = static_cast<LO> (this->getLocalNumRows ());
3836 
3837  // Attempt to convert indices to the new column Map's version of
3838  // local. This will fail if on the calling process, the graph has
3839  // indices that are not on that process in the new column Map.
3840  // After the local conversion attempt, we will do an all-reduce to
3841  // see if any processes failed.
3842 
3843  // If this is false, then either the graph contains a column index
3844  // which is invalid in the CURRENT column Map, or the graph is
3845  // locally indexed but currently has no column Map. In either
3846  // case, there is no way to convert the current local indices into
3847  // global indices, so that we can convert them into the new column
3848  // Map's local indices. It's possible for this to be true on some
3849  // processes but not others, due to replaceColMap.
3850  bool allCurColIndsValid = true;
3851  // On the calling process, are all valid current column indices
3852  // also in the new column Map on the calling process? In other
3853  // words, does local reindexing suffice, or should the user have
3854  // done an Import or Export instead?
3855  bool localSuffices = true;
3856 
3857  // Final arrays for the local indices. We will allocate exactly
3858  // one of these ONLY if the graph is locally indexed on the
3859  // calling process, and ONLY if the graph has one or more entries
3860  // (is not empty) on the calling process. In that case, we
3861  // allocate the first (1-D storage) if the graph has a static
3862  // profile, else we allocate the second (2-D storage).
3863  col_inds_type newLclInds1D;
3864  auto oldLclInds1D = lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
3865 
3866  // If indices aren't allocated, that means the calling process
3867  // owns no entries in the graph. Thus, there is nothing to
3868  // convert, and it trivially succeeds locally.
3869  if (indicesAreAllocated ()) {
3870  if (isLocallyIndexed ()) {
3871  if (hasColMap ()) { // locally indexed, and currently has a column Map
3872  const map_type& oldColMap = * (getColMap ());
3873  // Allocate storage for the new local indices.
3874  const size_t allocSize = this->getLocalAllocationSize ();
3875  newLclInds1D = col_inds_type("Tpetra::CrsGraph::lclIndsReindexedHost",
3876  allocSize);
3877  // Attempt to convert the new indices locally.
3878  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
3879  const RowInfo rowInfo = this->getRowInfo (lclRow);
3880  const size_t beg = rowInfo.offset1D;
3881  const size_t end = beg + rowInfo.numEntries;
3882  for (size_t k = beg; k < end; ++k) {
3883  const LO oldLclCol = oldLclInds1D(k);
3884  if (oldLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
3885  allCurColIndsValid = false;
3886  break; // Stop at the first invalid index
3887  }
3888  const GO gblCol = oldColMap.getGlobalElement (oldLclCol);
3889 
3890  // The above conversion MUST succeed. Otherwise, the
3891  // current local index is invalid, which means that
3892  // the graph was constructed incorrectly.
3893  if (gblCol == Teuchos::OrdinalTraits<GO>::invalid ()) {
3894  allCurColIndsValid = false;
3895  break; // Stop at the first invalid index
3896  }
3897  else {
3898  const LO newLclCol = newColMap->getLocalElement (gblCol);
3899  if (newLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
3900  localSuffices = false;
3901  break; // Stop at the first invalid index
3902  }
3903  newLclInds1D(k) = newLclCol;
3904  }
3905  } // for each entry in the current row
3906  } // for each locally owned row
3907  }
3908  else { // locally indexed, but no column Map
3909  // This case is only possible if replaceColMap() was called
3910  // with a null argument on the calling process. It's
3911  // possible, but it means that this method can't possibly
3912  // succeed, since we have no way of knowing how to convert
3913  // the current local indices to global indices.
3914  allCurColIndsValid = false;
3915  }
3916  }
3917  else { // globally indexed
3918  // If the graph is globally indexed, we don't need to save
3919  // local indices, but we _do_ need to know whether the current
3920  // global indices are valid in the new column Map. We may
3921  // need to do a getRemoteIndexList call to find this out.
3922  //
3923  // In this case, it doesn't matter whether the graph currently
3924  // has a column Map. We don't need the old column Map to
3925  // convert from global indices to the _new_ column Map's local
3926  // indices. Furthermore, we can use the same code, whether
3927  // the graph is static or dynamic profile.
3928 
3929  // Test whether the current global indices are in the new
3930  // column Map on the calling process.
3931  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
3932  const RowInfo rowInfo = this->getRowInfo (lclRow);
3933  auto oldGblRowView = this->getGlobalIndsViewHost (rowInfo);
3934  for (size_t k = 0; k < rowInfo.numEntries; ++k) {
3935  const GO gblCol = oldGblRowView(k);
3936  if (! newColMap->isNodeGlobalElement (gblCol)) {
3937  localSuffices = false;
3938  break; // Stop at the first invalid index
3939  }
3940  } // for each entry in the current row
3941  } // for each locally owned row
3942  } // locally or globally indexed
3943  } // whether indices are allocated
3944 
3945  // Do an all-reduce to check both possible error conditions.
3946  int lclSuccess[2];
3947  lclSuccess[0] = allCurColIndsValid ? 1 : 0;
3948  lclSuccess[1] = localSuffices ? 1 : 0;
3949  int gblSuccess[2];
3950  gblSuccess[0] = 0;
3951  gblSuccess[1] = 0;
3952  RCP<const Teuchos::Comm<int> > comm =
3953  getRowMap ().is_null () ? Teuchos::null : getRowMap ()->getComm ();
3954  if (! comm.is_null ()) {
3955  reduceAll<int, int> (*comm, REDUCE_MIN, 2, lclSuccess, gblSuccess);
3956  }
3957 
3958  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3959  gblSuccess[0] == 0, std::runtime_error, "It is not possible to continue."
3960  " The most likely reason is that the graph is locally indexed, but the "
3961  "column Map is missing (null) on some processes, due to a previous call "
3962  "to replaceColMap().");
3963 
3964  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3965  gblSuccess[1] == 0, std::runtime_error, "On some process, the graph "
3966  "contains column indices that are in the old column Map, but not in the "
3967  "new column Map (on that process). This method does NOT redistribute "
3968  "data; it does not claim to do the work of an Import or Export operation."
3969  " This means that for all processess, the calling process MUST own all "
3970  "column indices, in both the old column Map and the new column Map. In "
3971  "this case, you will need to do an Import or Export operation to "
3972  "redistribute data.");
3973 
3974  // Commit the results.
3975  if (isLocallyIndexed ()) {
3976  { // scope the device view; sortAndMergeAllIndices needs host
3977  typename local_inds_dualv_type::t_dev newLclInds1D_dev(
3978  Kokkos::view_alloc("Tpetra::CrsGraph::lclIndReindexed",
3979  Kokkos::WithoutInitializing),
3980  newLclInds1D.extent(0));
3981  Kokkos::deep_copy(newLclInds1D_dev, newLclInds1D);
3982  lclIndsUnpacked_wdv = local_inds_wdv_type(newLclInds1D_dev);
3983  }
3984 
3985  // We've reindexed, so we don't know if the indices are sorted.
3986  //
3987  // FIXME (mfh 17 Sep 2014) It could make sense to check this,
3988  // since we're already going through all the indices above. We
3989  // could also sort each row in place; that way, we would only
3990  // have to make one pass over the rows.
3991  indicesAreSorted_ = false;
3992  if (sortIndicesInEachRow) {
3993  // NOTE (mfh 17 Sep 2014) The graph must be locally indexed in
3994  // order to call this method.
3995  //
3996  // FIXME (mfh 17 Sep 2014) This violates the strong exception
3997  // guarantee. It would be better to sort the new index arrays
3998  // before committing them.
3999  const bool sorted = false; // need to resort
4000  const bool merged = true; // no need to merge, since no dups
4001  this->sortAndMergeAllIndices (sorted, merged);
4002  }
4003  }
4004  colMap_ = newColMap;
4005 
4006  if (newImport.is_null ()) {
4007  // FIXME (mfh 19 Aug 2014) Should use the above all-reduce to
4008  // check whether the input Import is null on any process.
4009  //
4010  // If the domain Map hasn't been set yet, we can't compute a new
4011  // Import object. Leave it what it is; it should be null, but
4012  // it doesn't matter. If the domain Map _has_ been set, then
4013  // compute a new Import object if necessary.
4014  if (! domainMap_.is_null ()) {
4015  if (! domainMap_->isSameAs (* newColMap)) {
4016  importer_ = Teuchos::rcp (new import_type (domainMap_, newColMap));
4017  } else {
4018  importer_ = Teuchos::null; // don't need an Import
4019  }
4020  }
4021  } else {
4022  // The caller gave us an Import object. Assume that it's valid.
4023  importer_ = newImport;
4024  }
4025  }
4026 
4027  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4028  void
4030  replaceDomainMap (const Teuchos::RCP<const map_type>& newDomainMap)
4031  {
4032  const char prefix[] = "Tpetra::CrsGraph::replaceDomainMap: ";
4033  TEUCHOS_TEST_FOR_EXCEPTION(
4034  colMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4035  "this method unless the graph already has a column Map.");
4036  TEUCHOS_TEST_FOR_EXCEPTION(
4037  newDomainMap.is_null (), std::invalid_argument,
4038  prefix << "The new domain Map must be nonnull.");
4039 
4040  // Create a new importer, if needed
4041  Teuchos::RCP<const import_type> newImporter = Teuchos::null;
4042  if (newDomainMap != colMap_ && (! newDomainMap->isSameAs (*colMap_))) {
4043  newImporter = rcp(new import_type(newDomainMap, colMap_));
4044  }
4045  this->replaceDomainMapAndImporter(newDomainMap, newImporter);
4046  }
4047 
4048  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4049  void
4051  replaceDomainMapAndImporter (const Teuchos::RCP<const map_type>& newDomainMap,
4052  const Teuchos::RCP<const import_type>& newImporter)
4053  {
4054  const char prefix[] = "Tpetra::CrsGraph::replaceDomainMapAndImporter: ";
4055  TEUCHOS_TEST_FOR_EXCEPTION(
4056  colMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4057  "this method unless the graph already has a column Map.");
4058  TEUCHOS_TEST_FOR_EXCEPTION(
4059  newDomainMap.is_null (), std::invalid_argument,
4060  prefix << "The new domain Map must be nonnull.");
4061 
4062  if (debug_) {
4063  if (newImporter.is_null ()) {
4064  // It's not a good idea to put expensive operations in a macro
4065  // clause, even if they are side effect - free, because macros
4066  // don't promise that they won't evaluate their arguments more
4067  // than once. It's polite for them to do so, but not required.
4068  const bool colSameAsDom = colMap_->isSameAs (*newDomainMap);
4069  TEUCHOS_TEST_FOR_EXCEPTION
4070  (!colSameAsDom, std::invalid_argument, "If the new Import is null, "
4071  "then the new domain Map must be the same as the current column Map.");
4072  }
4073  else {
4074  const bool colSameAsTgt =
4075  colMap_->isSameAs (* (newImporter->getTargetMap ()));
4076  const bool newDomSameAsSrc =
4077  newDomainMap->isSameAs (* (newImporter->getSourceMap ()));
4078  TEUCHOS_TEST_FOR_EXCEPTION
4079  (! colSameAsTgt || ! newDomSameAsSrc, std::invalid_argument, "If the "
4080  "new Import is nonnull, then the current column Map must be the same "
4081  "as the new Import's target Map, and the new domain Map must be the "
4082  "same as the new Import's source Map.");
4083  }
4084  }
4085 
4086  domainMap_ = newDomainMap;
4087  importer_ = Teuchos::rcp_const_cast<import_type> (newImporter);
4088  }
4089 
4090  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4091  void
4093  replaceRangeMap (const Teuchos::RCP<const map_type>& newRangeMap)
4094  {
4095  const char prefix[] = "Tpetra::CrsGraph::replaceRangeMap: ";
4096  TEUCHOS_TEST_FOR_EXCEPTION(
4097  rowMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4098  "this method unless the graph already has a row Map.");
4099  TEUCHOS_TEST_FOR_EXCEPTION(
4100  newRangeMap.is_null (), std::invalid_argument,
4101  prefix << "The new range Map must be nonnull.");
4102 
4103  // Create a new exporter, if needed
4104  Teuchos::RCP<const export_type> newExporter = Teuchos::null;
4105  if (newRangeMap != rowMap_ && (! newRangeMap->isSameAs (*rowMap_))) {
4106  newExporter = rcp(new export_type(rowMap_, newRangeMap));
4107  }
4108  this->replaceRangeMapAndExporter(newRangeMap, newExporter);
4109  }
4110 
4111  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4112  void
4114  replaceRangeMapAndExporter (const Teuchos::RCP<const map_type>& newRangeMap,
4115  const Teuchos::RCP<const export_type>& newExporter)
4116  {
4117  const char prefix[] = "Tpetra::CrsGraph::replaceRangeMapAndExporter: ";
4118  TEUCHOS_TEST_FOR_EXCEPTION(
4119  rowMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4120  "this method unless the graph already has a column Map.");
4121  TEUCHOS_TEST_FOR_EXCEPTION(
4122  newRangeMap.is_null (), std::invalid_argument,
4123  prefix << "The new domain Map must be nonnull.");
4124 
4125  if (debug_) {
4126  if (newExporter.is_null ()) {
4127  // It's not a good idea to put expensive operations in a macro
4128  // clause, even if they are side effect - free, because macros
4129  // don't promise that they won't evaluate their arguments more
4130  // than once. It's polite for them to do so, but not required.
4131  const bool rowSameAsRange = rowMap_->isSameAs (*newRangeMap);
4132  TEUCHOS_TEST_FOR_EXCEPTION
4133  (!rowSameAsRange, std::invalid_argument, "If the new Export is null, "
4134  "then the new range Map must be the same as the current row Map.");
4135  }
4136  else {
4137  const bool newRangeSameAsTgt =
4138  newRangeMap->isSameAs (* (newExporter->getTargetMap ()));
4139  const bool rowSameAsSrc =
4140  rowMap_->isSameAs (* (newExporter->getSourceMap ()));
4141  TEUCHOS_TEST_FOR_EXCEPTION
4142  (! rowSameAsSrc || ! newRangeSameAsTgt, std::invalid_argument, "If the "
4143  "new Export is nonnull, then the current row Map must be the same "
4144  "as the new Export's source Map, and the new range Map must be the "
4145  "same as the new Export's target Map.");
4146  }
4147  }
4148 
4149  rangeMap_ = newRangeMap;
4150  exporter_ = Teuchos::rcp_const_cast<export_type> (newExporter);
4151  }
4152 
4153 
4154  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4158  {
4159  return local_graph_device_type(
4160  lclIndsPacked_wdv.getDeviceView(Access::ReadWrite),
4161  this->getRowPtrsPackedDevice());
4162  }
4163 
4164  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4167  getLocalGraphHost () const
4168  {
4169  return local_graph_host_type(
4170  lclIndsPacked_wdv.getHostView(Access::ReadWrite),
4171  this->getRowPtrsPackedHost());
4172  }
4173 
4174  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4175  void
4178  {
4179  using ::Tpetra::Details::ProfilingRegion;
4180  using Teuchos::ArrayView;
4181  using Teuchos::outArg;
4182  using Teuchos::reduceAll;
4183  typedef global_size_t GST;
4184 
4185  ProfilingRegion regionCGC ("Tpetra::CrsGraph::computeGlobalConstants");
4186 
4187  this->computeLocalConstants ();
4188 
4189  // Compute global constants from local constants. Processes that
4190  // already have local constants still participate in the
4191  // all-reduces, using their previously computed values.
4192  if (! this->haveGlobalConstants_) {
4193  const Teuchos::Comm<int>& comm = * (this->getComm ());
4194  // Promote all the nodeNum* and nodeMaxNum* quantities from
4195  // size_t to global_size_t, when doing the all-reduces for
4196  // globalNum* / globalMaxNum* results.
4197  //
4198  // FIXME (mfh 07 May 2013) Unfortunately, we either have to do
4199  // this in two all-reduces (one for the sum and the other for
4200  // the max), or use a custom MPI_Op that combines the sum and
4201  // the max. The latter might even be slower than two
4202  // all-reduces on modern network hardware. It would also be a
4203  // good idea to use nonblocking all-reduces (MPI 3), so that we
4204  // don't have to wait around for the first one to finish before
4205  // starting the second one.
4206  GST lcl, gbl;
4207  lcl = static_cast<GST> (this->getLocalNumEntries ());
4208 
4209  reduceAll<int,GST> (comm, Teuchos::REDUCE_SUM, 1, &lcl, &gbl);
4210  this->globalNumEntries_ = gbl;
4211 
4212  const GST lclMaxNumRowEnt = static_cast<GST> (this->nodeMaxNumRowEntries_);
4213  reduceAll<int, GST> (comm, Teuchos::REDUCE_MAX, lclMaxNumRowEnt,
4214  outArg (this->globalMaxNumRowEntries_));
4215  this->haveGlobalConstants_ = true;
4216  }
4217  }
4218 
4219 
4220  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4221  void
4224  {
4225  using ::Tpetra::Details::ProfilingRegion;
4226 
4227  ProfilingRegion regionCLC ("Tpetra::CrsGraph::computeLocalConstants");
4228  if (this->haveLocalConstants_) {
4229  return;
4230  }
4231 
4232  // Reset local properties
4233  this->nodeMaxNumRowEntries_ =
4234  Teuchos::OrdinalTraits<size_t>::invalid();
4235 
4236  using LO = local_ordinal_type;
4237 
4238  auto ptr = this->getRowPtrsPackedDevice();
4239  const LO lclNumRows = ptr.extent(0) == 0 ?
4240  static_cast<LO> (0) :
4241  (static_cast<LO> (ptr.extent(0)) - static_cast<LO> (1));
4242 
4243  const LO lclMaxNumRowEnt =
4244  ::Tpetra::Details::maxDifference ("Tpetra::CrsGraph: nodeMaxNumRowEntries",
4245  ptr, lclNumRows);
4246  this->nodeMaxNumRowEntries_ = static_cast<size_t> (lclMaxNumRowEnt);
4247  this->haveLocalConstants_ = true;
4248  }
4249 
4250 
4251  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4252  std::pair<size_t, std::string>
4254  makeIndicesLocal (const bool verbose)
4255  {
4257  using Teuchos::arcp;
4258  using Teuchos::Array;
4259  using std::endl;
4260  typedef LocalOrdinal LO;
4261  typedef GlobalOrdinal GO;
4262  typedef device_type DT;
4263  typedef typename local_graph_device_type::row_map_type::non_const_value_type offset_type;
4264  typedef typename num_row_entries_type::non_const_value_type num_ent_type;
4265  const char tfecfFuncName[] = "makeIndicesLocal: ";
4266  ProfilingRegion regionMakeIndicesLocal ("Tpetra::CrsGraph::makeIndicesLocal");
4267 
4268  std::unique_ptr<std::string> prefix;
4269  if (verbose) {
4270  prefix = this->createPrefix("CrsGraph", "makeIndicesLocal");
4271  std::ostringstream os;
4272  os << *prefix << "lclNumRows: " << getLocalNumRows() << endl;
4273  std::cerr << os.str();
4274  }
4275 
4276  // These are somewhat global properties, so it's safe to have
4277  // exception checks for them, rather than returning an error code.
4278  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4279  (! this->hasColMap (), std::logic_error, "The graph does not have a "
4280  "column Map yet. This method should never be called in that case. "
4281  "Please report this bug to the Tpetra developers.");
4282  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4283  (this->getColMap ().is_null (), std::logic_error, "The graph claims "
4284  "that it has a column Map, because hasColMap() returns true. However, "
4285  "the result of getColMap() is null. This should never happen. Please "
4286  "report this bug to the Tpetra developers.");
4287 
4288  // Return value 1: The number of column indices (counting
4289  // duplicates) that could not be converted to local indices,
4290  // because they were not in the column Map on the calling process.
4291  size_t lclNumErrs = 0;
4292  std::ostringstream errStrm; // for return value 2 (error string)
4293 
4294  const LO lclNumRows = static_cast<LO> (this->getLocalNumRows ());
4295  const map_type& colMap = * (this->getColMap ());
4296 
4297  if (this->isGloballyIndexed () && lclNumRows != 0) {
4298  // This is a host-accessible View.
4299  typename num_row_entries_type::const_type h_numRowEnt =
4300  this->k_numRowEntries_;
4301 
4302  auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
4303 
4304  // Allocate space for local indices.
4305  if (rowPtrsUnpacked_host.extent(0) == 0) {
4306  errStrm << "Unpacked row pointers (rowPtrsUnpacked_dev_) has length 0. This should never "
4307  "happen here. Please report this bug to the Tpetra developers."
4308  << endl;
4309  // Need to return early.
4310  return std::make_pair(Tpetra::Details::OrdinalTraits<size_t>::invalid (),
4311  errStrm.str ());
4312  }
4313  const auto numEnt = rowPtrsUnpacked_host(lclNumRows);
4314 
4315  // mfh 17 Dec 2016: We don't need initial zero-fill of
4316  // lclIndsUnpacked_wdv, because we will fill it below anyway.
4317  // AllowPadding would only help for aligned access (e.g.,
4318  // for vectorization) if we also were to pad each row to the
4319  // same alignment, so we'll skip AllowPadding for now.
4320 
4321  // using Kokkos::AllowPadding;
4322  using Kokkos::view_alloc;
4323  using Kokkos::WithoutInitializing;
4324 
4325  // When giving the label as an argument to
4326  // Kokkos::view_alloc, the label must be a string and not a
4327  // char*, else the code won't compile. This is because
4328  // view_alloc also allows a raw pointer as its first
4329  // argument. See
4330  // https://github.com/kokkos/kokkos/issues/434. This is a
4331  // large allocation typically, so the overhead of creating
4332  // an std::string is minor.
4333  const std::string label ("Tpetra::CrsGraph::lclInd");
4334  if (verbose) {
4335  std::ostringstream os;
4336  os << *prefix << "(Re)allocate lclInd_wdv: old="
4337  << lclIndsUnpacked_wdv.extent(0) << ", new=" << numEnt << endl;
4338  std::cerr << os.str();
4339  }
4340 
4341  local_inds_dualv_type lclInds_dualv =
4342  local_inds_dualv_type(view_alloc(label, WithoutInitializing),
4343  numEnt);
4344  lclIndsUnpacked_wdv = local_inds_wdv_type(lclInds_dualv);
4345 
4346  auto lclColMap = colMap.getLocalMap ();
4347  // This is a "device mirror" of the host View h_numRowEnt.
4348  //
4349  // NOTE (mfh 27 Sep 2016) Currently, the right way to get a
4350  // Device instance is to use its default constructor. See the
4351  // following Kokkos issue:
4352  //
4353  // https://github.com/kokkos/kokkos/issues/442
4354  if (verbose) {
4355  std::ostringstream os;
4356  os << *prefix << "Allocate device mirror k_numRowEnt: "
4357  << h_numRowEnt.extent(0) << endl;
4358  std::cerr << os.str();
4359  }
4360  auto k_numRowEnt =
4361  Kokkos::create_mirror_view_and_copy (device_type (), h_numRowEnt);
4362 
4364  lclNumErrs =
4365  convertColumnIndicesFromGlobalToLocal<LO, GO, DT, offset_type, num_ent_type> (
4366  lclIndsUnpacked_wdv.getDeviceView(Access::OverwriteAll),
4367  gblInds_wdv.getDeviceView(Access::ReadOnly),
4368  this->getRowPtrsUnpackedDevice(),
4369  lclColMap,
4370  k_numRowEnt);
4371  if (lclNumErrs != 0) {
4372  const int myRank = [this] () {
4373  auto map = this->getMap ();
4374  if (map.is_null ()) {
4375  return 0;
4376  }
4377  else {
4378  auto comm = map->getComm ();
4379  return comm.is_null () ? 0 : comm->getRank ();
4380  }
4381  } ();
4382  const bool pluralNumErrs = (lclNumErrs != static_cast<size_t> (1));
4383  errStrm << "(Process " << myRank << ") When converting column "
4384  "indices from global to local, we encountered " << lclNumErrs
4385  << " ind" << (pluralNumErrs ? "ices" : "ex")
4386  << " that do" << (pluralNumErrs ? "es" : "")
4387  << " not live in the column Map on this process." << endl;
4388  }
4389 
4390  // We've converted column indices from global to local, so we
4391  // can deallocate the global column indices (which we know are
4392  // in 1-D storage, because the graph has static profile).
4393  if (verbose) {
4394  std::ostringstream os;
4395  os << *prefix << "Free gblInds_wdv: "
4396  << gblInds_wdv.extent(0) << endl;
4397  std::cerr << os.str();
4398  }
4399  gblInds_wdv = global_inds_wdv_type ();
4400  } // globallyIndexed() && lclNumRows > 0
4401 
4402  this->indicesAreLocal_ = true;
4403  this->indicesAreGlobal_ = false;
4404  this->checkInternalState ();
4405 
4406  return std::make_pair (lclNumErrs, errStrm.str ());
4407  }
4408 
4409  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4410  void
4412  makeColMap (Teuchos::Array<int>& remotePIDs)
4413  {
4415  using std::endl;
4416  const char tfecfFuncName[] = "makeColMap";
4417 
4418  ProfilingRegion regionSortAndMerge ("Tpetra::CrsGraph::makeColMap");
4419  std::unique_ptr<std::string> prefix;
4420  if (verbose_) {
4421  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4422  std::ostringstream os;
4423  os << *prefix << "Start" << endl;
4424  std::cerr << os.str();
4425  }
4426 
4427  // this->colMap_ should be null at this point, but we accept the
4428  // future possibility that it might not be (esp. if we decide
4429  // later to support graph structure changes after first
4430  // fillComplete, which CrsGraph does not currently (as of 12 Feb
4431  // 2017) support).
4432  Teuchos::RCP<const map_type> colMap = this->colMap_;
4433  const bool sortEachProcsGids =
4434  this->sortGhostsAssociatedWithEachProcessor_;
4435 
4436  // FIXME (mfh 12 Feb 2017) ::Tpetra::Details::makeColMap returns a
4437  // per-process error code. If an error does occur on a process,
4438  // ::Tpetra::Details::makeColMap does NOT promise that all processes will
4439  // notice that error. This is the caller's responsibility. For
4440  // now, we only propagate (to all processes) and report the error
4441  // in debug mode. In the future, we need to add the local/global
4442  // error handling scheme used in BlockCrsMatrix to this class.
4443  if (debug_) {
4444  using Teuchos::outArg;
4445  using Teuchos::REDUCE_MIN;
4446  using Teuchos::reduceAll;
4447 
4448  std::ostringstream errStrm;
4449  const int lclErrCode =
4450  Details::makeColMap (colMap, remotePIDs,
4451  getDomainMap (), *this, sortEachProcsGids, &errStrm);
4452  auto comm = this->getComm ();
4453  if (! comm.is_null ()) {
4454  const int lclSuccess = (lclErrCode == 0) ? 1 : 0;
4455  int gblSuccess = 0; // output argument
4456  reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess,
4457  outArg (gblSuccess));
4458  if (gblSuccess != 1) {
4459  std::ostringstream os;
4460  Details::gathervPrint (os, errStrm.str (), *comm);
4461  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4462  (true, std::runtime_error, ": An error happened on at "
4463  "least one process in the CrsGraph's communicator. "
4464  "Here are all processes' error messages:" << std::endl
4465  << os.str ());
4466  }
4467  }
4468  }
4469  else {
4470  (void) Details::makeColMap (colMap, remotePIDs,
4471  getDomainMap (), *this, sortEachProcsGids, nullptr);
4472  }
4473  // See above. We want to admit the possibility of makeColMap
4474  // actually revising an existing column Map, even though that
4475  // doesn't currently (as of 10 May 2017) happen.
4476  this->colMap_ = colMap;
4477 
4478  checkInternalState ();
4479  if (verbose_) {
4480  std::ostringstream os;
4481  os << *prefix << "Done" << endl;
4482  std::cerr << os.str();
4483  }
4484  }
4485 
4486 
4487  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4488  void
4490  sortAndMergeAllIndices (const bool sorted, const bool merged)
4491  {
4492  using std::endl;
4493  using LO = LocalOrdinal;
4494  using host_execution_space =
4495  typename Kokkos::View<LO*, device_type>::HostMirror::
4496  execution_space;
4497  using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
4498  const char tfecfFuncName[] = "sortAndMergeAllIndices";
4499  Details::ProfilingRegion regionSortAndMerge
4500  ("Tpetra::CrsGraph::sortAndMergeAllIndices");
4501 
4502  std::unique_ptr<std::string> prefix;
4503  if (verbose_) {
4504  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4505  std::ostringstream os;
4506  os << *prefix << "Start: "
4507  << "sorted=" << (sorted ? "true" : "false")
4508  << ", merged=" << (merged ? "true" : "false") << endl;
4509  std::cerr << os.str();
4510  }
4511  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4512  (this->isGloballyIndexed(), std::logic_error,
4513  "This method may only be called after makeIndicesLocal." );
4514  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4515  (! merged && this->isStorageOptimized(), std::logic_error,
4516  "The graph is already storage optimized, so we shouldn't be "
4517  "merging any indices. "
4518  "Please report this bug to the Tpetra developers.");
4519 
4520  if (! sorted || ! merged) {
4521  const LO lclNumRows(this->getLocalNumRows());
4522  auto range = range_type(0, lclNumRows);
4523 
4524  if (verbose_) {
4525  size_t totalNumDups = 0;
4526  //Sync and mark-modified the local indices before disabling WDV tracking
4527  lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
4529  Kokkos::parallel_reduce(range,
4530  [this, sorted, merged] (const LO lclRow, size_t& numDups)
4531  {
4532  const RowInfo rowInfo = this->getRowInfo(lclRow);
4533  numDups += this->sortAndMergeRowIndices(rowInfo, sorted, merged);
4534  },
4535  totalNumDups);
4537  std::ostringstream os;
4538  os << *prefix << "totalNumDups=" << totalNumDups << endl;
4539  std::cerr << os.str();
4540  }
4541  else {
4542  //Sync and mark-modified the local indices before disabling WDV tracking
4543  lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
4545  Kokkos::parallel_for(range,
4546  [this, sorted, merged] (const LO lclRow)
4547  {
4548  const RowInfo rowInfo = this->getRowInfo(lclRow);
4549  this->sortAndMergeRowIndices(rowInfo, sorted, merged);
4550  });
4552  }
4553  this->indicesAreSorted_ = true; // we just sorted every row
4554  this->noRedundancies_ = true; // we just merged every row
4555  }
4556 
4557  if (verbose_) {
4558  std::ostringstream os;
4559  os << *prefix << "Done" << endl;
4560  std::cerr << os.str();
4561  }
4562  }
4563 
4564  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4565  void
4567  makeImportExport (Teuchos::Array<int>& remotePIDs,
4568  const bool useRemotePIDs)
4569  {
4570  using ::Tpetra::Details::ProfilingRegion;
4571  using Teuchos::ParameterList;
4572  using Teuchos::RCP;
4573  using Teuchos::rcp;
4574  const char tfecfFuncName[] = "makeImportExport: ";
4575  ProfilingRegion regionMIE ("Tpetra::CrsGraph::makeImportExport");
4576 
4577  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4578  (! this->hasColMap (), std::logic_error,
4579  "This method may not be called unless the graph has a column Map.");
4580  RCP<ParameterList> params = this->getNonconstParameterList (); // could be null
4581 
4582  // Don't do any checks to see if we need to create the Import, if
4583  // it exists already.
4584  //
4585  // FIXME (mfh 25 Mar 2013) This will become incorrect if we
4586  // change CrsGraph in the future to allow changing the column
4587  // Map after fillComplete. For now, the column Map is fixed
4588  // after the first fillComplete call.
4589  if (importer_.is_null ()) {
4590  // Create the Import instance if necessary.
4591  if (domainMap_ != colMap_ && (! domainMap_->isSameAs (*colMap_))) {
4592  if (params.is_null () || ! params->isSublist ("Import")) {
4593  if (useRemotePIDs) {
4594  importer_ = rcp (new import_type (domainMap_, colMap_, remotePIDs));
4595  }
4596  else {
4597  importer_ = rcp (new import_type (domainMap_, colMap_));
4598  }
4599  }
4600  else {
4601  RCP<ParameterList> importSublist = sublist (params, "Import", true);
4602  if (useRemotePIDs) {
4603  RCP<import_type> newImp =
4604  rcp (new import_type (domainMap_, colMap_, remotePIDs,
4605  importSublist));
4606  importer_ = newImp;
4607  }
4608  else {
4609  importer_ = rcp (new import_type (domainMap_, colMap_, importSublist));
4610  }
4611  }
4612  }
4613  }
4614 
4615  // Don't do any checks to see if we need to create the Export, if
4616  // it exists already.
4617  if (exporter_.is_null ()) {
4618  // Create the Export instance if necessary.
4619  if (rangeMap_ != rowMap_ && ! rangeMap_->isSameAs (*rowMap_)) {
4620  if (params.is_null () || ! params->isSublist ("Export")) {
4621  exporter_ = rcp (new export_type (rowMap_, rangeMap_));
4622  }
4623  else {
4624  RCP<ParameterList> exportSublist = sublist (params, "Export", true);
4625  exporter_ = rcp (new export_type (rowMap_, rangeMap_, exportSublist));
4626  }
4627  }
4628  }
4629  }
4630 
4631 
4632  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4633  std::string
4636  {
4637  std::ostringstream oss;
4638  oss << dist_object_type::description ();
4639  if (isFillComplete ()) {
4640  oss << "{status = fill complete"
4641  << ", global rows = " << getGlobalNumRows()
4642  << ", global cols = " << getGlobalNumCols()
4643  << ", global num entries = " << getGlobalNumEntries()
4644  << "}";
4645  }
4646  else {
4647  oss << "{status = fill not complete"
4648  << ", global rows = " << getGlobalNumRows()
4649  << "}";
4650  }
4651  return oss.str();
4652  }
4653 
4654 
4655  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4656  void
4658  describe (Teuchos::FancyOStream &out,
4659  const Teuchos::EVerbosityLevel verbLevel) const
4660  {
4661  using Teuchos::ArrayView;
4662  using Teuchos::Comm;
4663  using Teuchos::RCP;
4664  using Teuchos::VERB_DEFAULT;
4665  using Teuchos::VERB_NONE;
4666  using Teuchos::VERB_LOW;
4667  using Teuchos::VERB_MEDIUM;
4668  using Teuchos::VERB_HIGH;
4669  using Teuchos::VERB_EXTREME;
4670  using std::endl;
4671  using std::setw;
4672 
4673  Teuchos::EVerbosityLevel vl = verbLevel;
4674  if (vl == VERB_DEFAULT) vl = VERB_LOW;
4675  RCP<const Comm<int> > comm = this->getComm();
4676  const int myImageID = comm->getRank(),
4677  numImages = comm->getSize();
4678  size_t width = 1;
4679  for (size_t dec=10; dec<getGlobalNumRows(); dec *= 10) {
4680  ++width;
4681  }
4682  width = std::max<size_t> (width, static_cast<size_t> (11)) + 2;
4683  Teuchos::OSTab tab (out);
4684  // none: print nothing
4685  // low: print O(1) info from node 0
4686  // medium: print O(P) info, num entries per node
4687  // high: print O(N) info, num entries per row
4688  // extreme: print O(NNZ) info: print graph indices
4689  //
4690  // for medium and higher, print constituent objects at specified verbLevel
4691  if (vl != VERB_NONE) {
4692  if (myImageID == 0) out << this->description() << std::endl;
4693  // O(1) globals, minus what was already printed by description()
4694  if (isFillComplete() && myImageID == 0) {
4695  out << "Global max number of row entries = " << globalMaxNumRowEntries_ << std::endl;
4696  }
4697  // constituent objects
4698  if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
4699  if (myImageID == 0) out << "\nRow map: " << std::endl;
4700  rowMap_->describe(out,vl);
4701  if (colMap_ != Teuchos::null) {
4702  if (myImageID == 0) out << "\nColumn map: " << std::endl;
4703  colMap_->describe(out,vl);
4704  }
4705  if (domainMap_ != Teuchos::null) {
4706  if (myImageID == 0) out << "\nDomain map: " << std::endl;
4707  domainMap_->describe(out,vl);
4708  }
4709  if (rangeMap_ != Teuchos::null) {
4710  if (myImageID == 0) out << "\nRange map: " << std::endl;
4711  rangeMap_->describe(out,vl);
4712  }
4713  }
4714  // O(P) data
4715  if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
4716  for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
4717  if (myImageID == imageCtr) {
4718  out << "Node ID = " << imageCtr << std::endl
4719  << "Node number of entries = " << this->getLocalNumEntries () << std::endl
4720  << "Node max number of entries = " << nodeMaxNumRowEntries_ << std::endl;
4721  if (! indicesAreAllocated ()) {
4722  out << "Indices are not allocated." << std::endl;
4723  }
4724  }
4725  comm->barrier();
4726  comm->barrier();
4727  comm->barrier();
4728  }
4729  }
4730  // O(N) and O(NNZ) data
4731  if (vl == VERB_HIGH || vl == VERB_EXTREME) {
4732  for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
4733  if (myImageID == imageCtr) {
4734  out << std::setw(width) << "Node ID"
4735  << std::setw(width) << "Global Row"
4736  << std::setw(width) << "Num Entries";
4737  if (vl == VERB_EXTREME) {
4738  out << " Entries";
4739  }
4740  out << std::endl;
4741  const LocalOrdinal lclNumRows =
4742  static_cast<LocalOrdinal> (this->getLocalNumRows ());
4743  for (LocalOrdinal r=0; r < lclNumRows; ++r) {
4744  const RowInfo rowinfo = this->getRowInfo (r);
4745  GlobalOrdinal gid = rowMap_->getGlobalElement(r);
4746  out << std::setw(width) << myImageID
4747  << std::setw(width) << gid
4748  << std::setw(width) << rowinfo.numEntries;
4749  if (vl == VERB_EXTREME) {
4750  out << " ";
4751  if (isGloballyIndexed()) {
4752  auto rowview = gblInds_wdv.getHostView(Access::ReadOnly);
4753  for (size_t j=0; j < rowinfo.numEntries; ++j){
4754  GlobalOrdinal colgid = rowview[j + rowinfo.offset1D];
4755  out << colgid << " ";
4756  }
4757  }
4758  else if (isLocallyIndexed()) {
4759  auto rowview = lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
4760  for (size_t j=0; j < rowinfo.numEntries; ++j) {
4761  LocalOrdinal collid = rowview[j + rowinfo.offset1D];
4762  out << colMap_->getGlobalElement(collid) << " ";
4763  }
4764  }
4765  }
4766  out << std::endl;
4767  }
4768  }
4769  comm->barrier();
4770  comm->barrier();
4771  comm->barrier();
4772  }
4773  }
4774  }
4775  }
4776 
4777 
4778  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4779  bool
4781  checkSizes (const SrcDistObject& /* source */)
4782  {
4783  // It's not clear what kind of compatibility checks on sizes can
4784  // be performed here. Epetra_CrsGraph doesn't check any sizes for
4785  // compatibility.
4786  return true;
4787  }
4788 
4789  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4790  void
4793  (const SrcDistObject& source,
4794  const size_t numSameIDs,
4795  const Kokkos::DualView<const local_ordinal_type*,
4796  buffer_device_type>& permuteToLIDs,
4797  const Kokkos::DualView<const local_ordinal_type*,
4798  buffer_device_type>& permuteFromLIDs,
4799  const CombineMode /*CM*/)
4800  {
4801  using std::endl;
4802  using LO = local_ordinal_type;
4803  using GO = global_ordinal_type;
4804  using this_CRS_type = CrsGraph<LO, GO, node_type>;
4805  const char tfecfFuncName[] = "copyAndPermute: ";
4806  const bool verbose = verbose_;
4807 
4808  std::unique_ptr<std::string> prefix;
4809  if (verbose) {
4810  prefix = this->createPrefix("CrsGraph", "copyAndPermute");
4811  std::ostringstream os;
4812  os << *prefix << endl;
4813  std::cerr << os.str ();
4814  }
4815 
4816  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4817  (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0),
4818  std::runtime_error, "permuteToLIDs.extent(0) = "
4819  << permuteToLIDs.extent (0) << " != permuteFromLIDs.extent(0) = "
4820  << permuteFromLIDs.extent (0) << ".");
4821 
4822  // We know from checkSizes that the source object is a
4823  // row_graph_type, so we don't need to check again.
4824  const row_graph_type& srcRowGraph =
4825  dynamic_cast<const row_graph_type&> (source);
4826 
4827  if (verbose) {
4828  std::ostringstream os;
4829  os << *prefix << "Compute padding" << endl;
4830  std::cerr << os.str ();
4831  }
4832  auto padding = computeCrsPadding(srcRowGraph, numSameIDs,
4833  permuteToLIDs, permuteFromLIDs, verbose);
4834  applyCrsPadding(*padding, verbose);
4835 
4836  // If the source object is actually a CrsGraph, we can use view
4837  // mode instead of copy mode to access the entries in each row,
4838  // if the graph is not fill complete.
4839  const this_CRS_type* srcCrsGraph =
4840  dynamic_cast<const this_CRS_type*> (&source);
4841 
4842  const map_type& srcRowMap = *(srcRowGraph.getRowMap());
4843  const map_type& tgtRowMap = *(getRowMap());
4844  const bool src_filled = srcRowGraph.isFillComplete();
4845  nonconst_global_inds_host_view_type row_copy;
4846  LO myid = 0;
4847 
4848  //
4849  // "Copy" part of "copy and permute."
4850  //
4851  if (src_filled || srcCrsGraph == nullptr) {
4852  if (verbose) {
4853  std::ostringstream os;
4854  os << *prefix << "src_filled || srcCrsGraph == nullptr" << endl;
4855  std::cerr << os.str ();
4856  }
4857  // If the source graph is fill complete, we can't use view mode,
4858  // because the data might be stored in a different format not
4859  // compatible with the expectations of view mode. Also, if the
4860  // source graph is not a CrsGraph, we can't use view mode,
4861  // because RowGraph only provides copy mode access to the data.
4862  for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
4863  const GO gid = srcRowMap.getGlobalElement (myid);
4864  size_t row_length = srcRowGraph.getNumEntriesInGlobalRow (gid);
4865  Kokkos::resize(row_copy,row_length);
4866  size_t check_row_length = 0;
4867  srcRowGraph.getGlobalRowCopy (gid, row_copy, check_row_length);
4868  this->insertGlobalIndices (gid, row_length, row_copy.data());
4869  }
4870  } else {
4871  if (verbose) {
4872  std::ostringstream os;
4873  os << *prefix << "! src_filled && srcCrsGraph != nullptr" << endl;
4874  std::cerr << os.str ();
4875  }
4876  for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
4877  const GO gid = srcRowMap.getGlobalElement (myid);
4878  global_inds_host_view_type row;
4879  srcCrsGraph->getGlobalRowView (gid, row);
4880  this->insertGlobalIndices (gid, row.extent(0), row.data());
4881  }
4882  }
4883 
4884  //
4885  // "Permute" part of "copy and permute."
4886  //
4887  auto permuteToLIDs_h = permuteToLIDs.view_host ();
4888  auto permuteFromLIDs_h = permuteFromLIDs.view_host ();
4889 
4890  if (src_filled || srcCrsGraph == nullptr) {
4891  for (LO i = 0; i < static_cast<LO> (permuteToLIDs_h.extent (0)); ++i) {
4892  const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs_h[i]);
4893  const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs_h[i]);
4894  size_t row_length = srcRowGraph.getNumEntriesInGlobalRow (srcgid);
4895  Kokkos::resize(row_copy,row_length);
4896  size_t check_row_length = 0;
4897  srcRowGraph.getGlobalRowCopy (srcgid, row_copy, check_row_length);
4898  this->insertGlobalIndices (mygid, row_length, row_copy.data());
4899  }
4900  } else {
4901  for (LO i = 0; i < static_cast<LO> (permuteToLIDs_h.extent (0)); ++i) {
4902  const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs_h[i]);
4903  const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs_h[i]);
4904  global_inds_host_view_type row;
4905  srcCrsGraph->getGlobalRowView (srcgid, row);
4906  this->insertGlobalIndices (mygid, row.extent(0), row.data());
4907  }
4908  }
4909 
4910  if (verbose) {
4911  std::ostringstream os;
4912  os << *prefix << "Done" << endl;
4913  std::cerr << os.str ();
4914  }
4915  }
4916 
4917  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4918  void
4920  applyCrsPadding(const padding_type& padding,
4921  const bool verbose)
4922  {
4924  using Details::padCrsArrays;
4925  using std::endl;
4926  using LO = local_ordinal_type;
4927  using row_ptrs_type =
4928  typename local_graph_device_type::row_map_type::non_const_type;
4929  using range_policy =
4930  Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
4931  const char tfecfFuncName[] = "applyCrsPadding";
4932  ProfilingRegion regionCAP("Tpetra::CrsGraph::applyCrsPadding");
4933 
4934  std::unique_ptr<std::string> prefix;
4935  if (verbose) {
4936  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4937  std::ostringstream os;
4938  os << *prefix << "padding: ";
4939  padding.print(os);
4940  os << endl;
4941  std::cerr << os.str();
4942  }
4943  const int myRank = ! verbose ? -1 : [&] () {
4944  auto map = this->getMap();
4945  if (map.is_null()) {
4946  return -1;
4947  }
4948  auto comm = map->getComm();
4949  if (comm.is_null()) {
4950  return -1;
4951  }
4952  return comm->getRank();
4953  } ();
4954 
4955  // FIXME (mfh 10 Feb 2020) We shouldn't actually reallocate
4956  // row_ptrs_beg or allocate row_ptrs_end unless the allocation
4957  // size needs to increase. That should be the job of
4958  // padCrsArrays.
4959 
4960  // Assume global indexing we don't have any indices yet
4961  if (! indicesAreAllocated()) {
4962  if (verbose) {
4963  std::ostringstream os;
4964  os << *prefix << "Call allocateIndices" << endl;
4965  std::cerr << os.str();
4966  }
4967  allocateIndices(GlobalIndices, verbose);
4968  }
4969  TEUCHOS_ASSERT( indicesAreAllocated() );
4970 
4971  // Making copies here because k_rowPtrs_ has a const type. Otherwise, we
4972  // would use it directly.
4973 
4974  auto rowPtrsUnpacked_dev = this->getRowPtrsUnpackedDevice();
4975  if (verbose) {
4976  std::ostringstream os;
4977  os << *prefix << "Allocate row_ptrs_beg: "
4978  << rowPtrsUnpacked_dev.extent(0) << endl;
4979  std::cerr << os.str();
4980  }
4981  using Kokkos::view_alloc;
4982  using Kokkos::WithoutInitializing;
4983  row_ptrs_type row_ptrs_beg(
4984  view_alloc("row_ptrs_beg", WithoutInitializing),
4985  rowPtrsUnpacked_dev.extent(0));
4986  // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
4987  Kokkos::deep_copy(execution_space(),row_ptrs_beg, rowPtrsUnpacked_dev);
4988 
4989  const size_t N = row_ptrs_beg.extent(0) == 0 ? size_t(0) :
4990  size_t(row_ptrs_beg.extent(0) - 1);
4991  if (verbose) {
4992  std::ostringstream os;
4993  os << *prefix << "Allocate row_ptrs_end: " << N << endl;
4994  std::cerr << os.str();
4995  }
4996  row_ptrs_type row_ptrs_end(
4997  view_alloc("row_ptrs_end", WithoutInitializing), N);
4998  row_ptrs_type num_row_entries;
4999 
5000  const bool refill_num_row_entries = k_numRowEntries_.extent(0) != 0;
5001 
5002  execution_space().fence(); // we need above deep_copy to be done
5003 
5004  if (refill_num_row_entries) { // Case 1: Unpacked storage
5005  // We can't assume correct *this capture until C++17, and it's
5006  // likely more efficient just to capture what we need anyway.
5007  num_row_entries =
5008  row_ptrs_type(view_alloc("num_row_entries", WithoutInitializing), N);
5009  Kokkos::deep_copy(num_row_entries, this->k_numRowEntries_);
5010  Kokkos::parallel_for
5011  ("Fill end row pointers", range_policy(0, N),
5012  KOKKOS_LAMBDA (const size_t i) {
5013  row_ptrs_end(i) = row_ptrs_beg(i) + num_row_entries(i);
5014  });
5015  }
5016  else {
5017  // FIXME (mfh 10 Feb 2020) Fix padCrsArrays so that if packed
5018  // storage, we don't need row_ptr_end to be separate allocation;
5019  // could just have it alias row_ptr_beg+1.
5020  Kokkos::parallel_for
5021  ("Fill end row pointers", range_policy(0, N),
5022  KOKKOS_LAMBDA (const size_t i) {
5023  row_ptrs_end(i) = row_ptrs_beg(i+1);
5024  });
5025  }
5026 
5027  if (isGloballyIndexed()) {
5028  padCrsArrays(row_ptrs_beg, row_ptrs_end, gblInds_wdv,
5029  padding, myRank, verbose);
5030  }
5031  else {
5032  padCrsArrays(row_ptrs_beg, row_ptrs_end, lclIndsUnpacked_wdv,
5033  padding, myRank, verbose);
5034  }
5035 
5036  if (refill_num_row_entries) {
5037  Kokkos::parallel_for
5038  ("Fill num entries", range_policy(0, N),
5039  KOKKOS_LAMBDA (const size_t i) {
5040  num_row_entries(i) = row_ptrs_end(i) - row_ptrs_beg(i);
5041  });
5042  Kokkos::deep_copy(this->k_numRowEntries_, num_row_entries);
5043  }
5044  if (verbose) {
5045  std::ostringstream os;
5046  os << *prefix << "Reassign k_rowPtrs_; old size: "
5047  << rowPtrsUnpacked_dev.extent(0) << ", new size: "
5048  << row_ptrs_beg.extent(0) << endl;
5049  std::cerr << os.str();
5050  TEUCHOS_ASSERT( rowPtrsUnpacked_dev.extent(0) == row_ptrs_beg.extent(0) );
5051  }
5052 
5053  setRowPtrsUnpacked(row_ptrs_beg);
5054  }
5055 
5056  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5057  std::unique_ptr<
5058  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5059  >
5060  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5061  computeCrsPadding(
5062  const RowGraph<LocalOrdinal,GlobalOrdinal,Node>& source,
5063  const size_t numSameIDs,
5064  const Kokkos::DualView<const local_ordinal_type*,
5065  buffer_device_type>& permuteToLIDs,
5066  const Kokkos::DualView<const local_ordinal_type*,
5067  buffer_device_type>& permuteFromLIDs,
5068  const bool verbose) const
5069  {
5070  using LO = local_ordinal_type;
5071  using std::endl;
5072 
5073  std::unique_ptr<std::string> prefix;
5074  if (verbose) {
5075  prefix = this->createPrefix("CrsGraph",
5076  "computeCrsPadding(same & permute)");
5077  std::ostringstream os;
5078  os << *prefix << "{numSameIDs: " << numSameIDs
5079  << ", numPermutes: " << permuteFromLIDs.extent(0) << "}"
5080  << endl;
5081  std::cerr << os.str();
5082  }
5083 
5084  const int myRank = [&] () {
5085  auto comm = rowMap_.is_null() ? Teuchos::null :
5086  rowMap_->getComm();
5087  return comm.is_null() ? -1 : comm->getRank();
5088  } ();
5089  std::unique_ptr<padding_type> padding(
5090  new padding_type(myRank, numSameIDs,
5091  permuteFromLIDs.extent(0)));
5092 
5093  computeCrsPaddingForSameIDs(*padding, source,
5094  static_cast<LO>(numSameIDs));
5095  computeCrsPaddingForPermutedIDs(*padding, source, permuteToLIDs,
5096  permuteFromLIDs);
5097  return padding;
5098  }
5099 
5100  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5101  void
5102  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5103  computeCrsPaddingForSameIDs(
5104  padding_type& padding,
5105  const RowGraph<local_ordinal_type, global_ordinal_type,
5106  node_type>& source,
5107  const local_ordinal_type numSameIDs) const
5108  {
5109  using LO = local_ordinal_type;
5110  using GO = global_ordinal_type;
5111  using Details::Impl::getRowGraphGlobalRow;
5112  using std::endl;
5113  const char tfecfFuncName[] = "computeCrsPaddingForSameIds";
5114 
5115  std::unique_ptr<std::string> prefix;
5116  const bool verbose = verbose_;
5117  if (verbose) {
5118  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5119  std::ostringstream os;
5120  os << *prefix << "numSameIDs: " << numSameIDs << endl;
5121  std::cerr << os.str();
5122  }
5123 
5124  if (numSameIDs == 0) {
5125  return;
5126  }
5127 
5128  const map_type& srcRowMap = *(source.getRowMap());
5129  const map_type& tgtRowMap = *rowMap_;
5130  using this_CRS_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
5131  const this_CRS_type* srcCrs = dynamic_cast<const this_CRS_type*>(&source);
5132  const bool src_is_unique =
5133  srcCrs == nullptr ? false : srcCrs->isMerged();
5134  const bool tgt_is_unique = this->isMerged();
5135 
5136  std::vector<GO> srcGblColIndsScratch;
5137  std::vector<GO> tgtGblColIndsScratch;
5138 
5139  execute_sync_host_uvm_access(); // protect host UVM access
5140  for (LO lclRowInd = 0; lclRowInd < numSameIDs; ++lclRowInd) {
5141  const GO srcGblRowInd = srcRowMap.getGlobalElement(lclRowInd);
5142  const GO tgtGblRowInd = tgtRowMap.getGlobalElement(lclRowInd);
5143  auto srcGblColInds = getRowGraphGlobalRow(
5144  srcGblColIndsScratch, source, srcGblRowInd);
5145  auto tgtGblColInds = getRowGraphGlobalRow(
5146  tgtGblColIndsScratch, *this, tgtGblRowInd);
5147  padding.update_same(lclRowInd, tgtGblColInds.getRawPtr(),
5148  tgtGblColInds.size(), tgt_is_unique,
5149  srcGblColInds.getRawPtr(),
5150  srcGblColInds.size(), src_is_unique);
5151  }
5152  if (verbose) {
5153  std::ostringstream os;
5154  os << *prefix << "Done" << endl;
5155  std::cerr << os.str();
5156  }
5157  }
5158 
5159  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5160  void
5161  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5162  computeCrsPaddingForPermutedIDs(
5163  padding_type& padding,
5164  const RowGraph<local_ordinal_type, global_ordinal_type,
5165  node_type>& source,
5166  const Kokkos::DualView<const local_ordinal_type*,
5167  buffer_device_type>& permuteToLIDs,
5168  const Kokkos::DualView<const local_ordinal_type*,
5169  buffer_device_type>& permuteFromLIDs) const
5170  {
5171  using LO = local_ordinal_type;
5172  using GO = global_ordinal_type;
5173  using Details::Impl::getRowGraphGlobalRow;
5174  using std::endl;
5175  const char tfecfFuncName[] = "computeCrsPaddingForPermutedIds";
5176 
5177  std::unique_ptr<std::string> prefix;
5178  const bool verbose = verbose_;
5179  if (verbose) {
5180  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5181  std::ostringstream os;
5182  os << *prefix << "permuteToLIDs.extent(0): "
5183  << permuteToLIDs.extent(0)
5184  << ", permuteFromLIDs.extent(0): "
5185  << permuteFromLIDs.extent(0) << endl;
5186  std::cerr << os.str();
5187  }
5188 
5189  if (permuteToLIDs.extent(0) == 0) {
5190  return;
5191  }
5192 
5193  const map_type& srcRowMap = *(source.getRowMap());
5194  const map_type& tgtRowMap = *rowMap_;
5195  using this_CRS_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
5196  const this_CRS_type* srcCrs = dynamic_cast<const this_CRS_type*>(&source);
5197  const bool src_is_unique =
5198  srcCrs == nullptr ? false : srcCrs->isMerged();
5199  const bool tgt_is_unique = this->isMerged();
5200 
5201  TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host() );
5202  auto permuteToLIDs_h = permuteToLIDs.view_host();
5203  TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host() );
5204  auto permuteFromLIDs_h = permuteFromLIDs.view_host();
5205 
5206  std::vector<GO> srcGblColIndsScratch;
5207  std::vector<GO> tgtGblColIndsScratch;
5208  const LO numPermutes = static_cast<LO>(permuteToLIDs_h.extent(0));
5209 
5210  execute_sync_host_uvm_access(); // protect host UVM access
5211  for (LO whichPermute = 0; whichPermute < numPermutes; ++whichPermute) {
5212  const LO srcLclRowInd = permuteFromLIDs_h[whichPermute];
5213  const GO srcGblRowInd = srcRowMap.getGlobalElement(srcLclRowInd);
5214  auto srcGblColInds = getRowGraphGlobalRow(
5215  srcGblColIndsScratch, source, srcGblRowInd);
5216  const LO tgtLclRowInd = permuteToLIDs_h[whichPermute];
5217  const GO tgtGblRowInd = tgtRowMap.getGlobalElement(tgtLclRowInd);
5218  auto tgtGblColInds = getRowGraphGlobalRow(
5219  tgtGblColIndsScratch, *this, tgtGblRowInd);
5220  padding.update_permute(whichPermute, tgtLclRowInd,
5221  tgtGblColInds.getRawPtr(),
5222  tgtGblColInds.size(), tgt_is_unique,
5223  srcGblColInds.getRawPtr(),
5224  srcGblColInds.size(), src_is_unique);
5225  }
5226 
5227  if (verbose) {
5228  std::ostringstream os;
5229  os << *prefix << "Done" << endl;
5230  std::cerr << os.str();
5231  }
5232  }
5233 
5234  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5235  std::unique_ptr<
5236  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5237  >
5238  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5239  computeCrsPaddingForImports(
5240  const Kokkos::DualView<const local_ordinal_type*,
5241  buffer_device_type>& importLIDs,
5242  Kokkos::DualView<packet_type*, buffer_device_type> imports,
5243  Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
5244  const bool verbose) const
5245  {
5246  using Details::Impl::getRowGraphGlobalRow;
5247  using std::endl;
5248  using LO = local_ordinal_type;
5249  using GO = global_ordinal_type;
5250  const char tfecfFuncName[] = "computeCrsPaddingForImports";
5251 
5252  std::unique_ptr<std::string> prefix;
5253  if (verbose) {
5254  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5255  std::ostringstream os;
5256  os << *prefix << "importLIDs.extent(0): "
5257  << importLIDs.extent(0)
5258  << ", imports.extent(0): "
5259  << imports.extent(0)
5260  << ", numPacketsPerLID.extent(0): "
5261  << numPacketsPerLID.extent(0) << endl;
5262  std::cerr << os.str();
5263  }
5264 
5265  const LO numImports = static_cast<LO>(importLIDs.extent(0));
5266  const int myRank = [&] () {
5267  auto comm = rowMap_.is_null() ? Teuchos::null :
5268  rowMap_->getComm();
5269  return comm.is_null() ? -1 : comm->getRank();
5270  } ();
5271  std::unique_ptr<padding_type> padding(
5272  new padding_type(myRank, numImports));
5273 
5274  if (imports.need_sync_host()) {
5275  imports.sync_host();
5276  }
5277  auto imports_h = imports.view_host();
5278  if (numPacketsPerLID.need_sync_host ()) {
5279  numPacketsPerLID.sync_host();
5280  }
5281  auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5282 
5283  TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
5284  auto importLIDs_h = importLIDs.view_host();
5285 
5286  const map_type& tgtRowMap = *rowMap_;
5287  // Always merge source column indices, since isMerged() is
5288  // per-process state, and we don't know its value on other
5289  // processes that sent us data.
5290  constexpr bool src_is_unique = false;
5291  const bool tgt_is_unique = isMerged();
5292 
5293  std::vector<GO> tgtGblColIndsScratch;
5294  size_t offset = 0;
5295  execute_sync_host_uvm_access(); // protect host UVM access
5296  for (LO whichImport = 0; whichImport < numImports; ++whichImport) {
5297  // CrsGraph packs just global column indices, while CrsMatrix
5298  // packs bytes (first the number of entries in the row, then the
5299  // global column indices, then other stuff like the matrix
5300  // values in that row).
5301  const LO origSrcNumEnt =
5302  static_cast<LO>(numPacketsPerLID_h[whichImport]);
5303  GO* const srcGblColInds = imports_h.data() + offset;
5304 
5305  const LO tgtLclRowInd = importLIDs_h[whichImport];
5306  const GO tgtGblRowInd =
5307  tgtRowMap.getGlobalElement(tgtLclRowInd);
5308  auto tgtGblColInds = getRowGraphGlobalRow(
5309  tgtGblColIndsScratch, *this, tgtGblRowInd);
5310  const size_t origTgtNumEnt(tgtGblColInds.size());
5311 
5312  padding->update_import(whichImport, tgtLclRowInd,
5313  tgtGblColInds.getRawPtr(),
5314  origTgtNumEnt, tgt_is_unique,
5315  srcGblColInds,
5316  origSrcNumEnt, src_is_unique);
5317  offset += origSrcNumEnt;
5318  }
5319 
5320  if (verbose) {
5321  std::ostringstream os;
5322  os << *prefix << "Done" << endl;
5323  std::cerr << os.str();
5324  }
5325  return padding;
5326  }
5327 
5328  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5329  std::unique_ptr<
5330  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5331  >
5332  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5333  computePaddingForCrsMatrixUnpack(
5334  const Kokkos::DualView<const local_ordinal_type*,
5335  buffer_device_type>& importLIDs,
5336  Kokkos::DualView<char*, buffer_device_type> imports,
5337  Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
5338  const bool verbose) const
5339  {
5340  using Details::Impl::getRowGraphGlobalRow;
5341  using Details::PackTraits;
5342  using std::endl;
5343  using LO = local_ordinal_type;
5344  using GO = global_ordinal_type;
5345  const char tfecfFuncName[] = "computePaddingForCrsMatrixUnpack";
5346 
5347  std::unique_ptr<std::string> prefix;
5348  if (verbose) {
5349  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5350  std::ostringstream os;
5351  os << *prefix << "importLIDs.extent(0): "
5352  << importLIDs.extent(0)
5353  << ", imports.extent(0): "
5354  << imports.extent(0)
5355  << ", numPacketsPerLID.extent(0): "
5356  << numPacketsPerLID.extent(0) << endl;
5357  std::cerr << os.str();
5358  }
5359  const bool extraVerbose =
5360  verbose && Details::Behavior::verbose("CrsPadding");
5361 
5362  const LO numImports = static_cast<LO>(importLIDs.extent(0));
5363  TEUCHOS_ASSERT( LO(numPacketsPerLID.extent(0)) >= numImports );
5364  const int myRank = [&] () {
5365  auto comm = rowMap_.is_null() ? Teuchos::null :
5366  rowMap_->getComm();
5367  return comm.is_null() ? -1 : comm->getRank();
5368  } ();
5369  std::unique_ptr<padding_type> padding(
5370  new padding_type(myRank, numImports));
5371 
5372  if (imports.need_sync_host()) {
5373  imports.sync_host();
5374  }
5375  auto imports_h = imports.view_host();
5376  if (numPacketsPerLID.need_sync_host ()) {
5377  numPacketsPerLID.sync_host();
5378  }
5379  auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5380 
5381  TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
5382  auto importLIDs_h = importLIDs.view_host();
5383 
5384  const map_type& tgtRowMap = *rowMap_;
5385  // Always merge source column indices, since isMerged() is
5386  // per-process state, and we don't know its value on other
5387  // processes that sent us data.
5388  constexpr bool src_is_unique = false;
5389  const bool tgt_is_unique = isMerged();
5390 
5391  std::vector<GO> srcGblColIndsScratch;
5392  std::vector<GO> tgtGblColIndsScratch;
5393  size_t offset = 0;
5394  execute_sync_host_uvm_access(); // protect host UVM access
5395  for (LO whichImport = 0; whichImport < numImports; ++whichImport) {
5396  // CrsGraph packs just global column indices, while CrsMatrix
5397  // packs bytes (first the number of entries in the row, then the
5398  // global column indices, then other stuff like the matrix
5399  // values in that row).
5400  const size_t numBytes = numPacketsPerLID_h[whichImport];
5401  if (extraVerbose) {
5402  std::ostringstream os;
5403  os << *prefix << "whichImport=" << whichImport
5404  << ", numImports=" << numImports
5405  << ", numBytes=" << numBytes << endl;
5406  std::cerr << os.str();
5407  }
5408  if (numBytes == 0) {
5409  continue; // special case: no entries to unpack for this row
5410  }
5411  LO origSrcNumEnt = 0;
5412  const size_t numEntBeg = offset;
5413  const size_t numEntLen =
5414  PackTraits<LO>::packValueCount(origSrcNumEnt);
5415  TEUCHOS_ASSERT( numBytes >= numEntLen );
5416  TEUCHOS_ASSERT( imports_h.extent(0) >= numEntBeg + numEntLen );
5417  PackTraits<LO>::unpackValue(origSrcNumEnt,
5418  imports_h.data() + numEntBeg);
5419  if (extraVerbose) {
5420  std::ostringstream os;
5421  os << *prefix << "whichImport=" << whichImport
5422  << ", numImports=" << numImports
5423  << ", origSrcNumEnt=" << origSrcNumEnt << endl;
5424  std::cerr << os.str();
5425  }
5426  TEUCHOS_ASSERT( origSrcNumEnt >= LO(0) );
5427  TEUCHOS_ASSERT( numBytes >= size_t(numEntLen + origSrcNumEnt * sizeof(GO)) );
5428  const size_t gidsBeg = numEntBeg + numEntLen;
5429  if (srcGblColIndsScratch.size() < size_t(origSrcNumEnt)) {
5430  srcGblColIndsScratch.resize(origSrcNumEnt);
5431  }
5432  GO* const srcGblColInds = srcGblColIndsScratch.data();
5433  PackTraits<GO>::unpackArray(srcGblColInds,
5434  imports_h.data() + gidsBeg,
5435  origSrcNumEnt);
5436  const LO tgtLclRowInd = importLIDs_h[whichImport];
5437  const GO tgtGblRowInd =
5438  tgtRowMap.getGlobalElement(tgtLclRowInd);
5439  auto tgtGblColInds = getRowGraphGlobalRow(
5440  tgtGblColIndsScratch, *this, tgtGblRowInd);
5441  const size_t origNumTgtEnt(tgtGblColInds.size());
5442 
5443  if (extraVerbose) {
5444  std::ostringstream os;
5445  os << *prefix << "whichImport=" << whichImport
5446  << ", numImports=" << numImports
5447  << ": Call padding->update_import" << endl;
5448  std::cerr << os.str();
5449  }
5450  padding->update_import(whichImport, tgtLclRowInd,
5451  tgtGblColInds.getRawPtr(),
5452  origNumTgtEnt, tgt_is_unique,
5453  srcGblColInds,
5454  origSrcNumEnt, src_is_unique);
5455  offset += numBytes;
5456  }
5457 
5458  if (verbose) {
5459  std::ostringstream os;
5460  os << *prefix << "Done" << endl;
5461  std::cerr << os.str();
5462  }
5463  return padding;
5464  }
5465 
5466  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5467  void
5468  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5469  packAndPrepare
5470  (const SrcDistObject& source,
5471  const Kokkos::DualView<const local_ordinal_type*,
5472  buffer_device_type>& exportLIDs,
5473  Kokkos::DualView<packet_type*,
5474  buffer_device_type>& exports,
5475  Kokkos::DualView<size_t*,
5476  buffer_device_type> numPacketsPerLID,
5477  size_t& constantNumPackets)
5478  {
5480  using GO = global_ordinal_type;
5481  using std::endl;
5482  using crs_graph_type =
5483  CrsGraph<local_ordinal_type, global_ordinal_type, node_type>;
5484  const char tfecfFuncName[] = "packAndPrepare: ";
5485  ProfilingRegion region_papn ("Tpetra::CrsGraph::packAndPrepare");
5486 
5487  const bool verbose = verbose_;
5488  std::unique_ptr<std::string> prefix;
5489  if (verbose) {
5490  prefix = this->createPrefix("CrsGraph", "packAndPrepare");
5491  std::ostringstream os;
5492  os << *prefix << "Start" << endl;
5493  std::cerr << os.str();
5494  }
5495 
5496  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5497  (exportLIDs.extent (0) != numPacketsPerLID.extent (0),
5498  std::runtime_error,
5499  "exportLIDs.extent(0) = " << exportLIDs.extent (0)
5500  << " != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent (0)
5501  << ".");
5502  const row_graph_type* srcRowGraphPtr =
5503  dynamic_cast<const row_graph_type*> (&source);
5504  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5505  (srcRowGraphPtr == nullptr, std::invalid_argument, "Source of an Export "
5506  "or Import operation to a CrsGraph must be a RowGraph with the same "
5507  "template parameters.");
5508  // We don't check whether src_graph has had fillComplete called,
5509  // because it doesn't matter whether the *source* graph has been
5510  // fillComplete'd. The target graph can not be fillComplete'd yet.
5511  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5512  (this->isFillComplete (), std::runtime_error,
5513  "The target graph of an Import or Export must not be fill complete.");
5514 
5515  const crs_graph_type* srcCrsGraphPtr =
5516  dynamic_cast<const crs_graph_type*> (&source);
5517 
5518  if (srcCrsGraphPtr == nullptr) {
5519  using Teuchos::ArrayView;
5520  using LO = local_ordinal_type;
5521 
5522  if (verbose) {
5523  std::ostringstream os;
5524  os << *prefix << "Source is a RowGraph but not a CrsGraph"
5525  << endl;
5526  std::cerr << os.str();
5527  }
5528  // RowGraph::pack serves the "old" DistObject interface. It
5529  // takes Teuchos::ArrayView and Teuchos::Array&. The latter
5530  // entails deep-copying the exports buffer on output. RowGraph
5531  // is a convenience interface when not a CrsGraph, so we accept
5532  // the performance hit.
5533  TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
5534  auto exportLIDs_h = exportLIDs.view_host ();
5535  ArrayView<const LO> exportLIDs_av (exportLIDs_h.data (),
5536  exportLIDs_h.extent (0));
5537  Teuchos::Array<GO> exports_a;
5538 
5539  numPacketsPerLID.clear_sync_state ();
5540  numPacketsPerLID.modify_host ();
5541  auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
5542  ArrayView<size_t> numPacketsPerLID_av (numPacketsPerLID_h.data (),
5543  numPacketsPerLID_h.extent (0));
5544  srcRowGraphPtr->pack (exportLIDs_av, exports_a, numPacketsPerLID_av,
5545  constantNumPackets);
5546  const size_t newSize = static_cast<size_t> (exports_a.size ());
5547  if (static_cast<size_t> (exports.extent (0)) != newSize) {
5548  using exports_dv_type = Kokkos::DualView<packet_type*, buffer_device_type>;
5549  exports = exports_dv_type ("exports", newSize);
5550  }
5551  Kokkos::View<const packet_type*, Kokkos::HostSpace,
5552  Kokkos::MemoryUnmanaged> exports_a_h (exports_a.getRawPtr (), newSize);
5553  exports.clear_sync_state ();
5554  exports.modify_host ();
5555  // DEEP_COPY REVIEW - NOT TESTED
5556  Kokkos::deep_copy (exports.view_host (), exports_a_h);
5557  }
5558  // packCrsGraphNew requires k_rowPtrsPacked_ to be set
5559  else if (! getColMap ().is_null () &&
5560  (this->getRowPtrsPackedDevice().extent (0) != 0 ||
5561  getRowMap ()->getLocalNumElements () == 0)) {
5562  if (verbose) {
5563  std::ostringstream os;
5564  os << *prefix << "packCrsGraphNew path" << endl;
5565  std::cerr << os.str();
5566  }
5567  using export_pids_type =
5568  Kokkos::DualView<const int*, buffer_device_type>;
5569  export_pids_type exportPIDs; // not filling it; needed for syntax
5570  using LO = local_ordinal_type;
5571  using NT = node_type;
5573  packCrsGraphNew<LO,GO,NT> (*srcCrsGraphPtr, exportLIDs, exportPIDs,
5574  exports, numPacketsPerLID,
5575  constantNumPackets, false);
5576  }
5577  else {
5578  srcCrsGraphPtr->packFillActiveNew (exportLIDs, exports, numPacketsPerLID,
5579  constantNumPackets);
5580  }
5581 
5582  if (verbose) {
5583  std::ostringstream os;
5584  os << *prefix << "Done" << endl;
5585  std::cerr << os.str();
5586  }
5587  }
5588 
5589  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5590  void
5592  pack (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5593  Teuchos::Array<GlobalOrdinal>& exports,
5594  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5595  size_t& constantNumPackets) const
5596  {
5597  auto col_map = this->getColMap();
5598  // packCrsGraph requires k_rowPtrsPacked to be set
5599  if( !col_map.is_null() && (this->getRowPtrsPackedDevice().extent(0) != 0 || getRowMap()->getLocalNumElements() ==0)) {
5601  packCrsGraph<LocalOrdinal,GlobalOrdinal,Node>(*this, exports, numPacketsPerLID,
5602  exportLIDs, constantNumPackets);
5603  }
5604  else {
5605  this->packFillActive(exportLIDs, exports, numPacketsPerLID,
5606  constantNumPackets);
5607  }
5608  }
5609 
5610  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5611  void
5613  packFillActive (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5614  Teuchos::Array<GlobalOrdinal>& exports,
5615  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5616  size_t& constantNumPackets) const
5617  {
5618  using std::endl;
5619  using LO = LocalOrdinal;
5620  using GO = GlobalOrdinal;
5621  using host_execution_space =
5622  typename Kokkos::View<size_t*, device_type>::
5623  HostMirror::execution_space;
5624  const char tfecfFuncName[] = "packFillActive: ";
5625  const bool verbose = verbose_;
5626 
5627  const auto numExportLIDs = exportLIDs.size ();
5628  std::unique_ptr<std::string> prefix;
5629  if (verbose) {
5630  prefix = this->createPrefix("CrsGraph", "allocateIndices");
5631  std::ostringstream os;
5632  os << *prefix << "numExportLIDs=" << numExportLIDs << endl;
5633  std::cerr << os.str();
5634  }
5635  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5636  (numExportLIDs != numPacketsPerLID.size (), std::runtime_error,
5637  "exportLIDs.size() = " << numExportLIDs << " != numPacketsPerLID.size()"
5638  " = " << numPacketsPerLID.size () << ".");
5639 
5640  const map_type& rowMap = * (this->getRowMap ());
5641  const map_type* const colMapPtr = this->colMap_.getRawPtr ();
5642  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5643  (this->isLocallyIndexed () && colMapPtr == nullptr, std::logic_error,
5644  "This graph claims to be locally indexed, but its column Map is nullptr. "
5645  "This should never happen. Please report this bug to the Tpetra "
5646  "developers.");
5647 
5648  // We may pack different amounts of data for different rows.
5649  constantNumPackets = 0;
5650 
5651  // mfh 20 Sep 2017: Teuchos::ArrayView isn't thread safe (well,
5652  // it might be now, but we might as well be safe).
5653  size_t* const numPacketsPerLID_raw = numPacketsPerLID.getRawPtr ();
5654  const LO* const exportLIDs_raw = exportLIDs.getRawPtr ();
5655 
5656  // Count the total number of packets (column indices, in the case
5657  // of a CrsGraph) to pack. While doing so, set
5658  // numPacketsPerLID[i] to the number of entries owned by the
5659  // calling process in (local) row exportLIDs[i] of the graph, that
5660  // the caller wants us to send out.
5661  Kokkos::RangePolicy<host_execution_space, LO> inputRange (0, numExportLIDs);
5662  size_t totalNumPackets = 0;
5663  size_t errCount = 0;
5664  // lambdas turn what they capture const, so we can't
5665  // atomic_add(&errCount,1). Instead, we need a View to modify.
5666  typedef Kokkos::Device<host_execution_space, Kokkos::HostSpace>
5667  host_device_type;
5668  Kokkos::View<size_t, host_device_type> errCountView (&errCount);
5669  constexpr size_t ONE = 1;
5670 
5671  execute_sync_host_uvm_access(); // protect host UVM access
5672  Kokkos::parallel_reduce ("Tpetra::CrsGraph::pack: totalNumPackets",
5673  inputRange,
5674  [=] (const LO& i, size_t& curTotalNumPackets) {
5675  const GO gblRow = rowMap.getGlobalElement (exportLIDs_raw[i]);
5676  if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
5677  Kokkos::atomic_add (&errCountView(), ONE);
5678  numPacketsPerLID_raw[i] = 0;
5679  }
5680  else {
5681  const size_t numEnt = this->getNumEntriesInGlobalRow (gblRow);
5682  numPacketsPerLID_raw[i] = numEnt;
5683  curTotalNumPackets += numEnt;
5684  }
5685  },
5686  totalNumPackets);
5687 
5688  if (verbose) {
5689  std::ostringstream os;
5690  os << *prefix << "totalNumPackets=" << totalNumPackets << endl;
5691  std::cerr << os.str();
5692  }
5693  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5694  (errCount != 0, std::logic_error, "totalNumPackets count encountered "
5695  "one or more errors! errCount = " << errCount
5696  << ", totalNumPackets = " << totalNumPackets << ".");
5697  errCount = 0;
5698 
5699  // Allocate space for all the column indices to pack.
5700  exports.resize (totalNumPackets);
5701 
5702  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5703  (! this->supportsRowViews (), std::logic_error,
5704  "this->supportsRowViews() returns false; this should never happen. "
5705  "Please report this bug to the Tpetra developers.");
5706 
5707  // Loop again over the rows to export, and pack rows of indices
5708  // into the output buffer.
5709 
5710  if (verbose) {
5711  std::ostringstream os;
5712  os << *prefix << "Pack into exports" << endl;
5713  std::cerr << os.str();
5714  }
5715 
5716  // Teuchos::ArrayView may not be thread safe, or may not be
5717  // efficiently thread safe. Better to use the raw pointer.
5718  GO* const exports_raw = exports.getRawPtr ();
5719  errCount = 0;
5720  Kokkos::parallel_scan ("Tpetra::CrsGraph::pack: pack from views",
5721  inputRange, [=, &prefix]
5722  (const LO i, size_t& exportsOffset, const bool final) {
5723  const size_t curOffset = exportsOffset;
5724  const GO gblRow = rowMap.getGlobalElement (exportLIDs_raw[i]);
5725  const RowInfo rowInfo =
5726  this->getRowInfoFromGlobalRowIndex (gblRow);
5727 
5728  using TDO = Tpetra::Details::OrdinalTraits<size_t>;
5729  if (rowInfo.localRow == TDO::invalid ()) {
5730  if (verbose) {
5731  std::ostringstream os;
5732  os << *prefix << ": INVALID rowInfo: i=" << i
5733  << ", lclRow=" << exportLIDs_raw[i] << endl;
5734  std::cerr << os.str();
5735  }
5736  Kokkos::atomic_add (&errCountView(), ONE);
5737  }
5738  else if (curOffset + rowInfo.numEntries > totalNumPackets) {
5739  if (verbose) {
5740  std::ostringstream os;
5741  os << *prefix << ": UH OH! For i=" << i << ", lclRow="
5742  << exportLIDs_raw[i] << ", gblRow=" << gblRow << ", curOffset "
5743  "(= " << curOffset << ") + numEnt (= " << rowInfo.numEntries
5744  << ") > totalNumPackets (= " << totalNumPackets << ")."
5745  << endl;
5746  std::cerr << os.str();
5747  }
5748  Kokkos::atomic_add (&errCountView(), ONE);
5749  }
5750  else {
5751  const LO numEnt = static_cast<LO> (rowInfo.numEntries);
5752  if (this->isLocallyIndexed ()) {
5753  auto lclColInds = getLocalIndsViewHost (rowInfo);
5754  if (final) {
5755  for (LO k = 0; k < numEnt; ++k) {
5756  const LO lclColInd = lclColInds(k);
5757  const GO gblColInd = colMapPtr->getGlobalElement (lclColInd);
5758  // Pack it, even if it's wrong. Let the receiving
5759  // process deal with it. Otherwise, we'll miss out
5760  // on any correct data.
5761  exports_raw[curOffset + k] = gblColInd;
5762  } // for each entry in the row
5763  } // final pass?
5764  exportsOffset = curOffset + numEnt;
5765  }
5766  else if (this->isGloballyIndexed ()) {
5767  auto gblColInds = getGlobalIndsViewHost (rowInfo);
5768  if (final) {
5769  for (LO k = 0; k < numEnt; ++k) {
5770  const GO gblColInd = gblColInds(k);
5771  // Pack it, even if it's wrong. Let the receiving
5772  // process deal with it. Otherwise, we'll miss out
5773  // on any correct data.
5774  exports_raw[curOffset + k] = gblColInd;
5775  } // for each entry in the row
5776  } // final pass?
5777  exportsOffset = curOffset + numEnt;
5778  }
5779  // If neither globally nor locally indexed, then the graph
5780  // has no entries in this row (or indeed, in any row on this
5781  // process) to pack.
5782  }
5783  });
5784 
5785  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5786  (errCount != 0, std::logic_error, "Packing encountered "
5787  "one or more errors! errCount = " << errCount
5788  << ", totalNumPackets = " << totalNumPackets << ".");
5789 
5790  if (verbose) {
5791  std::ostringstream os;
5792  os << *prefix << "Done" << endl;
5793  std::cerr << os.str();
5794  }
5795  }
5796 
5797  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5798  void
5799  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5800  packFillActiveNew (const Kokkos::DualView<const local_ordinal_type*,
5801  buffer_device_type>& exportLIDs,
5802  Kokkos::DualView<packet_type*,
5803  buffer_device_type>& exports,
5804  Kokkos::DualView<size_t*,
5805  buffer_device_type> numPacketsPerLID,
5806  size_t& constantNumPackets) const
5807  {
5808  using std::endl;
5809  using LO = local_ordinal_type;
5810  using GO = global_ordinal_type;
5811  using host_execution_space = typename Kokkos::View<size_t*,
5812  device_type>::HostMirror::execution_space;
5813  using host_device_type =
5814  Kokkos::Device<host_execution_space, Kokkos::HostSpace>;
5815  using exports_dv_type =
5816  Kokkos::DualView<packet_type*, buffer_device_type>;
5817  const char tfecfFuncName[] = "packFillActiveNew: ";
5818  const bool verbose = verbose_;
5819 
5820  const auto numExportLIDs = exportLIDs.extent (0);
5821  std::unique_ptr<std::string> prefix;
5822  if (verbose) {
5823  prefix = this->createPrefix("CrsGraph", "packFillActiveNew");
5824  std::ostringstream os;
5825  os << *prefix << "numExportLIDs: " << numExportLIDs
5826  << ", numPacketsPerLID.extent(0): "
5827  << numPacketsPerLID.extent(0) << endl;
5828  std::cerr << os.str();
5829  }
5830  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5831  (numExportLIDs != numPacketsPerLID.extent (0), std::runtime_error,
5832  "exportLIDs.extent(0) = " << numExportLIDs
5833  << " != numPacketsPerLID.extent(0) = "
5834  << numPacketsPerLID.extent (0) << ".");
5835  TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
5836  auto exportLIDs_h = exportLIDs.view_host ();
5837 
5838  const map_type& rowMap = * (this->getRowMap ());
5839  const map_type* const colMapPtr = this->colMap_.getRawPtr ();
5840  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5841  (this->isLocallyIndexed () && colMapPtr == nullptr, std::logic_error,
5842  "This graph claims to be locally indexed, but its column Map is nullptr. "
5843  "This should never happen. Please report this bug to the Tpetra "
5844  "developers.");
5845 
5846  // We may pack different amounts of data for different rows.
5847  constantNumPackets = 0;
5848 
5849  numPacketsPerLID.clear_sync_state ();
5850  numPacketsPerLID.modify_host ();
5851  auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
5852 
5853  // Count the total number of packets (column indices, in the case
5854  // of a CrsGraph) to pack. While doing so, set
5855  // numPacketsPerLID[i] to the number of entries owned by the
5856  // calling process in (local) row exportLIDs[i] of the graph, that
5857  // the caller wants us to send out.
5858  using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
5859  range_type inputRange (0, numExportLIDs);
5860  size_t totalNumPackets = 0;
5861  size_t errCount = 0;
5862  // lambdas turn what they capture const, so we can't
5863  // atomic_add(&errCount,1). Instead, we need a View to modify.
5864  Kokkos::View<size_t, host_device_type> errCountView (&errCount);
5865  constexpr size_t ONE = 1;
5866 
5867  if (verbose) {
5868  std::ostringstream os;
5869  os << *prefix << "Compute totalNumPackets" << endl;
5870  std::cerr << os.str ();
5871  }
5872 
5873  execute_sync_host_uvm_access(); // protect host UVM access
5874  totalNumPackets = 0;
5875  for (size_t i=0; i<numExportLIDs; ++i) {
5876  const LO lclRow = exportLIDs_h[i];
5877  const GO gblRow = rowMap.getGlobalElement (lclRow);
5878  if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
5879  if (verbose) {
5880  std::ostringstream os;
5881  os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5882  << " not in row Map on this process" << endl;
5883  std::cerr << os.str();
5884  }
5885  Kokkos::atomic_add (&errCountView(), ONE);
5886  numPacketsPerLID_h(i) = 0;
5887  }
5888  else {
5889  const size_t numEnt = this->getNumEntriesInGlobalRow (gblRow);
5890  numPacketsPerLID_h(i) = numEnt;
5891  totalNumPackets += numEnt;
5892  }
5893  }
5894 
5895  if (verbose) {
5896  std::ostringstream os;
5897  os << *prefix << "totalNumPackets: " << totalNumPackets
5898  << ", errCount: " << errCount << endl;
5899  std::cerr << os.str ();
5900  }
5901  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5902  (errCount != 0, std::logic_error, "totalNumPackets count encountered "
5903  "one or more errors! totalNumPackets: " << totalNumPackets
5904  << ", errCount: " << errCount << ".");
5905 
5906  // Allocate space for all the column indices to pack.
5907  if (size_t(exports.extent (0)) < totalNumPackets) {
5908  // FIXME (mfh 09 Apr 2019) Create without initializing.
5909  exports = exports_dv_type ("exports", totalNumPackets);
5910  }
5911 
5912  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5913  (! this->supportsRowViews (), std::logic_error,
5914  "this->supportsRowViews() returns false; this should never happen. "
5915  "Please report this bug to the Tpetra developers.");
5916 
5917  // Loop again over the rows to export, and pack rows of indices
5918  // into the output buffer.
5919 
5920  if (verbose) {
5921  std::ostringstream os;
5922  os << *prefix << "Pack into exports buffer" << endl;
5923  std::cerr << os.str();
5924  }
5925 
5926  exports.clear_sync_state ();
5927  exports.modify_host ();
5928  auto exports_h = exports.view_host ();
5929 
5930  errCount = 0;
5931 
5932  // The following parallel_scan needs const host access to lclIndsUnpacked_wdv
5933  // (if locally indexed) or gblInds_wdv (if globally indexed).
5934  if(isLocallyIndexed())
5935  lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
5936  else if(isGloballyIndexed())
5937  gblInds_wdv.getHostView(Access::ReadOnly);
5938 
5940  Kokkos::parallel_scan
5941  ("Tpetra::CrsGraph::packFillActiveNew: Pack exports",
5942  inputRange, [=, &prefix]
5943  (const LO i, size_t& exportsOffset, const bool final) {
5944  const size_t curOffset = exportsOffset;
5945  const LO lclRow = exportLIDs_h(i);
5946  const GO gblRow = rowMap.getGlobalElement (lclRow);
5947  if (gblRow == Details::OrdinalTraits<GO>::invalid ()) {
5948  if (verbose) {
5949  std::ostringstream os;
5950  os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5951  << " not in row Map on this process" << endl;
5952  std::cerr << os.str();
5953  }
5954  Kokkos::atomic_add (&errCountView(), ONE);
5955  return;
5956  }
5957 
5958  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (gblRow);
5959  if (rowInfo.localRow == Details::OrdinalTraits<size_t>::invalid ()) {
5960  if (verbose) {
5961  std::ostringstream os;
5962  os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5963  << ", gblRow=" << gblRow << ": invalid rowInfo"
5964  << endl;
5965  std::cerr << os.str();
5966  }
5967  Kokkos::atomic_add (&errCountView(), ONE);
5968  return;
5969  }
5970 
5971  if (curOffset + rowInfo.numEntries > totalNumPackets) {
5972  if (verbose) {
5973  std::ostringstream os;
5974  os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5975  << ", gblRow=" << gblRow << ", curOffset (= "
5976  << curOffset << ") + numEnt (= " << rowInfo.numEntries
5977  << ") > totalNumPackets (= " << totalNumPackets
5978  << ")." << endl;
5979  std::cerr << os.str();
5980  }
5981  Kokkos::atomic_add (&errCountView(), ONE);
5982  return;
5983  }
5984 
5985  const LO numEnt = static_cast<LO> (rowInfo.numEntries);
5986  if (this->isLocallyIndexed ()) {
5987  auto lclColInds = getLocalIndsViewHost(rowInfo);
5988  if (final) {
5989  for (LO k = 0; k < numEnt; ++k) {
5990  const LO lclColInd = lclColInds(k);
5991  const GO gblColInd = colMapPtr->getGlobalElement (lclColInd);
5992  // Pack it, even if it's wrong. Let the receiving
5993  // process deal with it. Otherwise, we'll miss out
5994  // on any correct data.
5995  exports_h(curOffset + k) = gblColInd;
5996  } // for each entry in the row
5997  } // final pass?
5998  exportsOffset = curOffset + numEnt;
5999  }
6000  else if (this->isGloballyIndexed ()) {
6001  auto gblColInds = getGlobalIndsViewHost(rowInfo);
6002  if (final) {
6003  for (LO k = 0; k < numEnt; ++k) {
6004  const GO gblColInd = gblColInds(k);
6005  // Pack it, even if it's wrong. Let the receiving
6006  // process deal with it. Otherwise, we'll miss out
6007  // on any correct data.
6008  exports_h(curOffset + k) = gblColInd;
6009  } // for each entry in the row
6010  } // final pass?
6011  exportsOffset = curOffset + numEnt;
6012  }
6013  // If neither globally nor locally indexed, then the graph
6014  // has no entries in this row (or indeed, in any row on this
6015  // process) to pack.
6016  });
6018 
6019  // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6020  // (errCount != 0, std::logic_error, "Packing encountered "
6021  // "one or more errors! errCount = " << errCount
6022  // << ", totalNumPackets = " << totalNumPackets << ".");
6023 
6024  if (verbose) {
6025  std::ostringstream os;
6026  os << *prefix << "errCount=" << errCount << "; Done" << endl;
6027  std::cerr << os.str();
6028  }
6029  }
6030 
6031  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6032  void
6035  (const Kokkos::DualView<const local_ordinal_type*,
6036  buffer_device_type>& importLIDs,
6037  Kokkos::DualView<packet_type*,
6038  buffer_device_type> imports,
6039  Kokkos::DualView<size_t*,
6040  buffer_device_type> numPacketsPerLID,
6041  const size_t /* constantNumPackets */,
6042  const CombineMode /* combineMode */ )
6043  {
6045  using std::endl;
6046  using LO = local_ordinal_type;
6047  using GO = global_ordinal_type;
6048  const char tfecfFuncName[] = "unpackAndCombine";
6049 
6050  ProfilingRegion regionCGC("Tpetra::CrsGraph::unpackAndCombine");
6051  const bool verbose = verbose_;
6052 
6053  std::unique_ptr<std::string> prefix;
6054  if (verbose) {
6055  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
6056  std::ostringstream os;
6057  os << *prefix << "Start" << endl;
6058  std::cerr << os.str ();
6059  }
6060  {
6061  auto padding = computeCrsPaddingForImports(
6062  importLIDs, imports, numPacketsPerLID, verbose);
6063  applyCrsPadding(*padding, verbose);
6064  if (verbose) {
6065  std::ostringstream os;
6066  os << *prefix << "Done computing & applying padding" << endl;
6067  std::cerr << os.str ();
6068  }
6069  }
6070 
6071  // FIXME (mfh 02 Apr 2012) REPLACE combine mode has a perfectly
6072  // reasonable meaning, whether or not the matrix is fill complete.
6073  // It's just more work to implement.
6074 
6075  // We are not checking the value of the CombineMode input
6076  // argument. For CrsGraph, we only support import/export
6077  // operations if fillComplete has not yet been called. Any
6078  // incoming column-indices are inserted into the target graph. In
6079  // this context, CombineMode values of ADD vs INSERT are
6080  // equivalent. What is the meaning of REPLACE for CrsGraph? If a
6081  // duplicate column-index is inserted, it will be compressed out
6082  // when fillComplete is called.
6083  //
6084  // Note: I think REPLACE means that an existing row is replaced by
6085  // the imported row, i.e., the existing indices are cleared. CGB,
6086  // 6/17/2010
6087 
6088  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6089  (importLIDs.extent (0) != numPacketsPerLID.extent (0),
6090  std::runtime_error, ": importLIDs.extent(0) = "
6091  << importLIDs.extent (0) << " != numPacketsPerLID.extent(0) = "
6092  << numPacketsPerLID.extent (0) << ".");
6093  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6094  (isFillComplete (), std::runtime_error,
6095  ": Import or Export operations are not allowed on a target "
6096  "CrsGraph that is fillComplete.");
6097 
6098  const size_t numImportLIDs(importLIDs.extent(0));
6099  if (numPacketsPerLID.need_sync_host()) {
6100  numPacketsPerLID.sync_host();
6101  }
6102  auto numPacketsPerLID_h = numPacketsPerLID.view_host();
6103  if (imports.need_sync_host()) {
6104  imports.sync_host();
6105  }
6106  auto imports_h = imports.view_host();
6107  TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
6108  auto importLIDs_h = importLIDs.view_host();
6109 
6110  // If we're inserting in local indices, let's pre-allocate
6111  Teuchos::Array<LO> lclColInds;
6112  if (isLocallyIndexed()) {
6113  if (verbose) {
6114  std::ostringstream os;
6115  os << *prefix << "Preallocate local indices scratch" << endl;
6116  std::cerr << os.str();
6117  }
6118  size_t maxNumInserts = 0;
6119  for (size_t i = 0; i < numImportLIDs; ++i) {
6120  maxNumInserts = std::max (maxNumInserts, numPacketsPerLID_h[i]);
6121  }
6122  if (verbose) {
6123  std::ostringstream os;
6124  os << *prefix << "Local indices scratch size: "
6125  << maxNumInserts << endl;
6126  std::cerr << os.str();
6127  }
6128  lclColInds.resize (maxNumInserts);
6129  }
6130  else {
6131  if (verbose) {
6132  std::ostringstream os;
6133  os << *prefix;
6134  if (isGloballyIndexed()) {
6135  os << "Graph is globally indexed";
6136  }
6137  else {
6138  os << "Graph is neither locally nor globally indexed";
6139  }
6140  os << endl;
6141  std::cerr << os.str();
6142  }
6143  }
6144 
6145  TEUCHOS_ASSERT( ! rowMap_.is_null() );
6146  const map_type& rowMap = *rowMap_;
6147 
6148  try {
6149  size_t importsOffset = 0;
6150  for (size_t i = 0; i < numImportLIDs; ++i) {
6151  if (verbose) {
6152  std::ostringstream os;
6153  os << *prefix << "i=" << i << ", numImportLIDs="
6154  << numImportLIDs << endl;
6155  std::cerr << os.str();
6156  }
6157  // We can only unpack into owned rows, since we only have
6158  // local row indices.
6159  const LO lclRow = importLIDs_h[i];
6160  const GO gblRow = rowMap.getGlobalElement(lclRow);
6161  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6162  (gblRow == Teuchos::OrdinalTraits<GO>::invalid(),
6163  std::logic_error, "importLIDs[i=" << i << "]="
6164  << lclRow << " is not in the row Map on the calling "
6165  "process.");
6166  const LO numEnt = numPacketsPerLID_h[i];
6167  const GO* const gblColInds = (numEnt == 0) ? nullptr :
6168  imports_h.data() + importsOffset;
6169  if (! isLocallyIndexed()) {
6170  insertGlobalIndicesFiltered(lclRow, gblColInds, numEnt);
6171  }
6172  else {
6173  // FIXME (mfh 09 Feb 2020) Now would be a good time to do
6174  // column Map filtering.
6175  for (LO j = 0; j < numEnt; j++) {
6176  lclColInds[j] = colMap_->getLocalElement(gblColInds[j]);
6177  }
6178  insertLocalIndices(lclRow, numEnt, lclColInds.data());
6179  }
6180  importsOffset += numEnt;
6181  }
6182  }
6183  catch (std::exception& e) {
6184  TEUCHOS_TEST_FOR_EXCEPTION
6185  (true, std::runtime_error,
6186  "Tpetra::CrsGraph::unpackAndCombine: Insert loop threw an "
6187  "exception: " << endl << e.what());
6188  }
6189 
6190  if (verbose) {
6191  std::ostringstream os;
6192  os << *prefix << "Done" << endl;
6193  std::cerr << os.str();
6194  }
6195  }
6196 
6197  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6198  void
6200  removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& newMap)
6201  {
6202  using Teuchos::Comm;
6203  using Teuchos::null;
6204  using Teuchos::ParameterList;
6205  using Teuchos::RCP;
6206 
6207  // We'll set all the state "transactionally," so that this method
6208  // satisfies the strong exception guarantee. This object's state
6209  // won't be modified until the end of this method.
6210  RCP<const map_type> rowMap, domainMap, rangeMap, colMap;
6211  RCP<import_type> importer;
6212  RCP<export_type> exporter;
6213 
6214  rowMap = newMap;
6215  RCP<const Comm<int> > newComm =
6216  (newMap.is_null ()) ? null : newMap->getComm ();
6217 
6218  if (! domainMap_.is_null ()) {
6219  if (domainMap_.getRawPtr () == rowMap_.getRawPtr ()) {
6220  // Common case: original domain and row Maps are identical.
6221  // In that case, we need only replace the original domain Map
6222  // with the new Map. This ensures that the new domain and row
6223  // Maps _stay_ identical.
6224  domainMap = newMap;
6225  } else {
6226  domainMap = domainMap_->replaceCommWithSubset (newComm);
6227  }
6228  }
6229  if (! rangeMap_.is_null ()) {
6230  if (rangeMap_.getRawPtr () == rowMap_.getRawPtr ()) {
6231  // Common case: original range and row Maps are identical. In
6232  // that case, we need only replace the original range Map with
6233  // the new Map. This ensures that the new range and row Maps
6234  // _stay_ identical.
6235  rangeMap = newMap;
6236  } else {
6237  rangeMap = rangeMap_->replaceCommWithSubset (newComm);
6238  }
6239  }
6240  if (! colMap_.is_null ()) {
6241  colMap = colMap_->replaceCommWithSubset (newComm);
6242  }
6243 
6244  // (Re)create the Export and / or Import if necessary.
6245  if (! newComm.is_null ()) {
6246  RCP<ParameterList> params = this->getNonconstParameterList (); // could be null
6247  //
6248  // The operations below are collective on the new communicator.
6249  //
6250  // (Re)create the Export object if necessary. If I haven't
6251  // called fillComplete yet, I don't have a rangeMap, so I must
6252  // first check if the _original_ rangeMap is not null. Ditto
6253  // for the Import object and the domain Map.
6254  if (! rangeMap_.is_null () &&
6255  rangeMap != rowMap &&
6256  ! rangeMap->isSameAs (*rowMap)) {
6257  if (params.is_null () || ! params->isSublist ("Export")) {
6258  exporter = rcp (new export_type (rowMap, rangeMap));
6259  }
6260  else {
6261  RCP<ParameterList> exportSublist = sublist (params, "Export", true);
6262  exporter = rcp (new export_type (rowMap, rangeMap, exportSublist));
6263  }
6264  }
6265  // (Re)create the Import object if necessary.
6266  if (! domainMap_.is_null () &&
6267  domainMap != colMap &&
6268  ! domainMap->isSameAs (*colMap)) {
6269  if (params.is_null () || ! params->isSublist ("Import")) {
6270  importer = rcp (new import_type (domainMap, colMap));
6271  } else {
6272  RCP<ParameterList> importSublist = sublist (params, "Import", true);
6273  importer = rcp (new import_type (domainMap, colMap, importSublist));
6274  }
6275  }
6276  } // if newComm is not null
6277 
6278  // Defer side effects until the end. If no destructors throw
6279  // exceptions (they shouldn't anyway), then this method satisfies
6280  // the strong exception guarantee.
6281  exporter_ = exporter;
6282  importer_ = importer;
6283  rowMap_ = rowMap;
6284  // mfh 31 Mar 2013: DistObject's map_ is the row Map of a CrsGraph
6285  // or CrsMatrix. CrsGraph keeps a redundant pointer (rowMap_) to
6286  // the same object. We might want to get rid of this redundant
6287  // pointer sometime, but for now, we'll leave it alone and just
6288  // set map_ to the same object.
6289  this->map_ = rowMap;
6290  domainMap_ = domainMap;
6291  rangeMap_ = rangeMap;
6292  colMap_ = colMap;
6293  }
6294 
6295  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6296  void
6298  getLocalDiagOffsets (const Kokkos::View<size_t*, device_type, Kokkos::MemoryUnmanaged>& offsets) const
6299  {
6300  using std::endl;
6301  using LO = LocalOrdinal;
6302  using GO = GlobalOrdinal;
6303  const char tfecfFuncName[] = "getLocalDiagOffsets: ";
6304  const bool verbose = verbose_;
6305 
6306  std::unique_ptr<std::string> prefix;
6307  if (verbose) {
6308  prefix = this->createPrefix("CrsGraph", "getLocalDiagOffsets");
6309  std::ostringstream os;
6310  os << *prefix << "offsets.extent(0)=" << offsets.extent(0)
6311  << endl;
6312  std::cerr << os.str();
6313  }
6314 
6315  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6316  (! hasColMap (), std::runtime_error, "The graph must have a column Map.");
6317  const LO lclNumRows = static_cast<LO> (this->getLocalNumRows ());
6318  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6319  (static_cast<LO> (offsets.extent (0)) < lclNumRows,
6320  std::invalid_argument, "offsets.extent(0) = " <<
6321  offsets.extent (0) << " < getLocalNumRows() = " << lclNumRows << ".");
6322 
6323  const map_type& rowMap = * (this->getRowMap ());
6324  const map_type& colMap = * (this->getColMap ());
6325 
6326  // We only use these in debug mode, but since debug mode is a
6327  // run-time option, they need to exist here. That's why we create
6328  // the vector with explicit size zero, to avoid overhead if debug
6329  // mode is off.
6330  bool allRowMapDiagEntriesInColMap = true;
6331  bool allDiagEntriesFound = true;
6332  bool allOffsetsCorrect = true;
6333  bool noOtherWeirdness = true;
6334  using wrong_offsets_type = std::vector<std::pair<LO, size_t> >;
6335  wrong_offsets_type wrongOffsets(0);
6336 
6337  // mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just
6338  // the subset of Map functionality that we need below.
6339  auto lclRowMap = rowMap.getLocalMap ();
6340  auto lclColMap = colMap.getLocalMap ();
6341 
6342  // FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this
6343  // setup, at least on the host. For CUDA, we have to use LocalMap
6344  // (that comes from each of the two Maps).
6345 
6346  const bool sorted = this->isSorted ();
6347  if (isFillComplete ()) {
6348  auto lclGraph = this->getLocalGraphDevice ();
6349  ::Tpetra::Details::getGraphDiagOffsets (offsets, lclRowMap, lclColMap,
6350  lclGraph.row_map,
6351  lclGraph.entries, sorted);
6352  }
6353  else {
6354  // NOTE (mfh 22 Feb 2017): We have to run this code on host,
6355  // since the graph is not fill complete. The previous version
6356  // of this code assumed UVM; this version does not.
6357  auto offsets_h = Kokkos::create_mirror_view (offsets);
6358 
6359  for (LO lclRowInd = 0; lclRowInd < lclNumRows; ++lclRowInd) {
6360  // Find the diagonal entry. Since the row Map and column Map
6361  // may differ, we have to compare global row and column
6362  // indices, not local.
6363  const GO gblRowInd = lclRowMap.getGlobalElement (lclRowInd);
6364  const GO gblColInd = gblRowInd;
6365  const LO lclColInd = lclColMap.getLocalElement (gblColInd);
6366 
6367  if (lclColInd == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
6368  allRowMapDiagEntriesInColMap = false;
6369  offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
6370  }
6371  else {
6372  const RowInfo rowInfo = this->getRowInfo (lclRowInd);
6373  if (static_cast<LO> (rowInfo.localRow) == lclRowInd &&
6374  rowInfo.numEntries > 0) {
6375 
6376  auto colInds = this->getLocalIndsViewHost (rowInfo);
6377  const size_t hint = 0; // not needed for this algorithm
6378  const size_t offset =
6379  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
6380  lclColInd, hint, sorted);
6381  offsets_h(lclRowInd) = offset;
6382 
6383  if (debug_) {
6384  // Now that we have what we think is an offset, make sure
6385  // that it really does point to the diagonal entry. Offsets
6386  // are _relative_ to each row, not absolute (for the whole
6387  // (local) graph).
6388  typename local_inds_dualv_type::t_host::const_type lclColInds;
6389  try {
6390  lclColInds = this->getLocalIndsViewHost (rowInfo);
6391  }
6392  catch (...) {
6393  noOtherWeirdness = false;
6394  }
6395  // Don't continue with error checking if the above failed.
6396  if (noOtherWeirdness) {
6397  const size_t numEnt = lclColInds.extent (0);
6398  if (offset >= numEnt) {
6399  // Offsets are relative to each row, so this means that
6400  // the offset is out of bounds.
6401  allOffsetsCorrect = false;
6402  wrongOffsets.push_back (std::make_pair (lclRowInd, offset));
6403  } else {
6404  const LO actualLclColInd = lclColInds(offset);
6405  const GO actualGblColInd = lclColMap.getGlobalElement (actualLclColInd);
6406  if (actualGblColInd != gblColInd) {
6407  allOffsetsCorrect = false;
6408  wrongOffsets.push_back (std::make_pair (lclRowInd, offset));
6409  }
6410  }
6411  }
6412  } // debug_
6413  }
6414  else { // either row is empty, or something went wrong w/ getRowInfo()
6415  offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
6416  allDiagEntriesFound = false;
6417  }
6418  } // whether lclColInd is a valid local column index
6419  } // for each local row
6420  // DEEP_COPY REVIEW - NOT TESTED
6421  Kokkos::deep_copy (offsets, offsets_h);
6422  } // whether the graph is fill complete
6423 
6424  if (verbose && wrongOffsets.size () != 0) {
6425  std::ostringstream os;
6426  os << *prefix << "Wrong offsets: [";
6427  for (size_t k = 0; k < wrongOffsets.size (); ++k) {
6428  os << "(" << wrongOffsets[k].first << ","
6429  << wrongOffsets[k].second << ")";
6430  if (k + 1 < wrongOffsets.size ()) {
6431  os << ", ";
6432  }
6433  }
6434  os << "]" << endl;
6435  std::cerr << os.str();
6436  }
6437 
6438  if (debug_) {
6439  using Teuchos::reduceAll;
6440  using std::endl;
6441  Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getComm ();
6442  const bool localSuccess =
6443  allRowMapDiagEntriesInColMap && allDiagEntriesFound && allOffsetsCorrect;
6444  const int numResults = 5;
6445  int lclResults[5];
6446  lclResults[0] = allRowMapDiagEntriesInColMap ? 1 : 0;
6447  lclResults[1] = allDiagEntriesFound ? 1 : 0;
6448  lclResults[2] = allOffsetsCorrect ? 1 : 0;
6449  lclResults[3] = noOtherWeirdness ? 1 : 0;
6450  // min-all-reduce will compute least rank of all the processes
6451  // that didn't succeed.
6452  lclResults[4] = ! localSuccess ? comm->getRank () : comm->getSize ();
6453 
6454  int gblResults[5];
6455  gblResults[0] = 0;
6456  gblResults[1] = 0;
6457  gblResults[2] = 0;
6458  gblResults[3] = 0;
6459  gblResults[4] = 0;
6460  reduceAll<int, int> (*comm, Teuchos::REDUCE_MIN,
6461  numResults, lclResults, gblResults);
6462 
6463  if (gblResults[0] != 1 || gblResults[1] != 1 || gblResults[2] != 1
6464  || gblResults[3] != 1) {
6465  std::ostringstream os; // build error message
6466  os << "Issue(s) that we noticed (on Process " << gblResults[4] << ", "
6467  "possibly among others): " << endl;
6468  if (gblResults[0] == 0) {
6469  os << " - The column Map does not contain at least one diagonal entry "
6470  "of the graph." << endl;
6471  }
6472  if (gblResults[1] == 0) {
6473  os << " - On one or more processes, some row does not contain a "
6474  "diagonal entry." << endl;
6475  }
6476  if (gblResults[2] == 0) {
6477  os << " - On one or more processes, some offsets are incorrect."
6478  << endl;
6479  }
6480  if (gblResults[3] == 0) {
6481  os << " - One or more processes had some other error."
6482  << endl;
6483  }
6484  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str());
6485  }
6486  } // debug_
6487  }
6488 
6489  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6490  void
6492  getLocalOffRankOffsets (offset_device_view_type& offsets) const
6493  {
6494  using std::endl;
6495  const char tfecfFuncName[] = "getLocalOffRankOffsets: ";
6496  const bool verbose = verbose_;
6497 
6498  std::unique_ptr<std::string> prefix;
6499  if (verbose) {
6500  prefix = this->createPrefix("CrsGraph", "getLocalOffRankOffsets");
6501  std::ostringstream os;
6502  os << *prefix << "offsets.extent(0)=" << offsets.extent(0)
6503  << endl;
6504  std::cerr << os.str();
6505  }
6506 
6507  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6508  (! hasColMap (), std::runtime_error, "The graph must have a column Map.");
6509  // Instead of throwing, we could also copy the rowPtr to k_offRankOffsets_.
6510 
6511  const size_t lclNumRows = this->getLocalNumRows ();
6512 
6513  if (haveLocalOffRankOffsets_ && k_offRankOffsets_.extent(0) == lclNumRows+1) {
6514  offsets = k_offRankOffsets_;
6515  return;
6516  }
6517  haveLocalOffRankOffsets_ = false;
6518 
6519  const map_type& colMap = * (this->getColMap ());
6520  const map_type& domMap = * (this->getDomainMap ());
6521 
6522  // mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just
6523  // the subset of Map functionality that we need below.
6524  auto lclColMap = colMap.getLocalMap ();
6525  auto lclDomMap = domMap.getLocalMap ();
6526 
6527  // FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this
6528  // setup, at least on the host. For CUDA, we have to use LocalMap
6529  // (that comes from each of the two Maps).
6530 
6531  TEUCHOS_ASSERT(this->isSorted ());
6532  if (isFillComplete ()) {
6533  k_offRankOffsets_ = offset_device_view_type(Kokkos::ViewAllocateWithoutInitializing("offRankOffset"), lclNumRows+1);
6534  auto lclGraph = this->getLocalGraphDevice ();
6535  ::Tpetra::Details::getGraphOffRankOffsets (k_offRankOffsets_,
6536  lclColMap, lclDomMap,
6537  lclGraph);
6538  offsets = k_offRankOffsets_;
6539  haveLocalOffRankOffsets_ = true;
6540  } else {
6541  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6542  (true, std::logic_error, "Can't get off-rank offsets for non-fill-complete graph");
6543  }
6544  }
6545 
6546  namespace { // (anonymous)
6547 
6548  // mfh 21 Jan 2016: This is useful for getLocalDiagOffsets (see
6549  // below). The point is to avoid the deep copy between the input
6550  // Teuchos::ArrayRCP and the internally used Kokkos::View. We
6551  // can't use UVM to avoid the deep copy with CUDA, because the
6552  // ArrayRCP is a host pointer, while the input to the graph's
6553  // getLocalDiagOffsets method is a device pointer. Assigning a
6554  // host pointer to a device pointer is incorrect unless the host
6555  // pointer points to host pinned memory. The goal is to get rid
6556  // of the Teuchos::ArrayRCP overload anyway, so we accept the deep
6557  // copy for backwards compatibility.
6558  //
6559  // We have to use template magic because
6560  // "staticGraph_->getLocalDiagOffsets(offsetsHosts)" won't compile
6561  // if device_type::memory_space is not Kokkos::HostSpace (as is
6562  // the case with CUDA).
6563 
6564  template<class DeviceType,
6565  const bool memSpaceIsHostSpace =
6566  std::is_same<typename DeviceType::memory_space,
6567  Kokkos::HostSpace>::value>
6568  struct HelpGetLocalDiagOffsets {};
6569 
6570  template<class DeviceType>
6571  struct HelpGetLocalDiagOffsets<DeviceType, true> {
6572  typedef DeviceType device_type;
6573  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6574  Kokkos::MemoryUnmanaged> device_offsets_type;
6575  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6576  Kokkos::MemoryUnmanaged> host_offsets_type;
6577 
6578  static device_offsets_type
6579  getDeviceOffsets (const host_offsets_type& hostOffsets)
6580  {
6581  // Host and device are the same; no need to allocate a
6582  // temporary device View.
6583  return hostOffsets;
6584  }
6585 
6586  static void
6587  copyBackIfNeeded (const host_offsets_type& /* hostOffsets */,
6588  const device_offsets_type& /* deviceOffsets */)
6589  { /* copy back not needed; host and device are the same */ }
6590  };
6591 
6592  template<class DeviceType>
6593  struct HelpGetLocalDiagOffsets<DeviceType, false> {
6594  typedef DeviceType device_type;
6595  // We have to do a deep copy, since host memory space != device
6596  // memory space. Thus, the device View is managed (we need to
6597  // allocate a temporary device View).
6598  typedef Kokkos::View<size_t*, device_type> device_offsets_type;
6599  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6600  Kokkos::MemoryUnmanaged> host_offsets_type;
6601 
6602  static device_offsets_type
6603  getDeviceOffsets (const host_offsets_type& hostOffsets)
6604  {
6605  // Host memory space != device memory space, so we must
6606  // allocate a temporary device View for the graph.
6607  return device_offsets_type ("offsets", hostOffsets.extent (0));
6608  }
6609 
6610  static void
6611  copyBackIfNeeded (const host_offsets_type& hostOffsets,
6612  const device_offsets_type& deviceOffsets)
6613  {
6614  // DEEP_COPY REVIEW - NOT TESTED
6615  Kokkos::deep_copy (hostOffsets, deviceOffsets);
6616  }
6617  };
6618  } // namespace (anonymous)
6619 
6620 
6621