Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_CrsGraph_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ************************************************************************
38 // @HEADER
39 
40 #ifndef TPETRA_CRSGRAPH_DEF_HPP
41 #define TPETRA_CRSGRAPH_DEF_HPP
42 
45 
50 #include "Tpetra_Details_getGraphDiagOffsets.hpp"
51 #include "Tpetra_Details_getGraphOffRankOffsets.hpp"
52 #include "Tpetra_Details_makeColMap.hpp"
56 #include "Tpetra_Distributor.hpp"
57 #include "Teuchos_SerialDenseMatrix.hpp"
58 #include "Tpetra_Vector.hpp"
59 #include "Tpetra_Import_Util.hpp"
60 #include "Tpetra_Import_Util2.hpp"
61 #include "Tpetra_Details_packCrsGraph.hpp"
62 #include "Tpetra_Details_unpackCrsGraphAndCombine.hpp"
63 #include "Tpetra_Details_CrsPadding.hpp"
64 #include "Tpetra_Util.hpp"
65 #include <algorithm>
66 #include <limits>
67 #include <map>
68 #include <sstream>
69 #include <string>
70 #include <type_traits>
71 #include <utility>
72 #include <vector>
73 
74 namespace Tpetra {
75  namespace Details {
76  namespace Impl {
77 
78  template<class MapIter>
79  void
80  verbosePrintMap(std::ostream& out,
81  MapIter beg,
82  MapIter end,
83  const size_t numEnt,
84  const char mapName[])
85  {
86  using ::Tpetra::Details::Behavior;
88 
89  out << mapName << ": {";
90  const size_t maxNumToPrint =
92  if (maxNumToPrint == 0) {
93  if (numEnt != 0) {
94  out << "...";
95  }
96  }
97  else {
98  const size_t numToPrint = numEnt > maxNumToPrint ?
99  maxNumToPrint : numEnt;
100  size_t count = 0;
101  for (MapIter it = beg; it != end; ++it) {
102  out << "(" << (*it).first << ", ";
103  verbosePrintArray(out, (*it).second, "gblColInds",
104  maxNumToPrint);
105  out << ")";
106  if (count + size_t(1) < numToPrint) {
107  out << ", ";
108  }
109  ++count;
110  }
111  if (count < numEnt) {
112  out << ", ...";
113  }
114  }
115  out << "}";
116  }
117 
118  template<class LO, class GO, class Node>
119  Teuchos::ArrayView<GO>
120  getRowGraphGlobalRow(
121  std::vector<GO>& gblColIndsStorage,
122  const RowGraph<LO, GO, Node>& graph,
123  const GO gblRowInd)
124  {
125  size_t origNumEnt = graph.getNumEntriesInGlobalRow(gblRowInd);
126  if (gblColIndsStorage.size() < origNumEnt) {
127  gblColIndsStorage.resize(origNumEnt);
128  }
129  typename CrsGraph<LO,GO,Node>::nonconst_global_inds_host_view_type gblColInds(gblColIndsStorage.data(),
130  origNumEnt);
131  graph.getGlobalRowCopy(gblRowInd, gblColInds, origNumEnt);
132  Teuchos::ArrayView<GO> retval(gblColIndsStorage.data(),origNumEnt);
133  return retval;
134  }
135 
136  template<class LO, class GO, class DT, class OffsetType, class NumEntType>
137  class ConvertColumnIndicesFromGlobalToLocal {
138  public:
139  ConvertColumnIndicesFromGlobalToLocal (const ::Kokkos::View<LO*, DT>& lclColInds,
140  const ::Kokkos::View<const GO*, DT>& gblColInds,
141  const ::Kokkos::View<const OffsetType*, DT>& ptr,
142  const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
143  const ::Kokkos::View<const NumEntType*, DT>& numRowEnt) :
144  lclColInds_ (lclColInds),
145  gblColInds_ (gblColInds),
146  ptr_ (ptr),
147  lclColMap_ (lclColMap),
148  numRowEnt_ (numRowEnt)
149  {}
150 
151  KOKKOS_FUNCTION void
152  operator () (const LO& lclRow, OffsetType& curNumBad) const
153  {
154  const OffsetType offset = ptr_(lclRow);
155  // NOTE (mfh 26 Jun 2016) It's always legal to cast the number
156  // of entries in a row to LO, as long as the row doesn't have
157  // too many duplicate entries.
158  const LO numEnt = static_cast<LO> (numRowEnt_(lclRow));
159  for (LO j = 0; j < numEnt; ++j) {
160  const GO gid = gblColInds_(offset + j);
161  const LO lid = lclColMap_.getLocalElement (gid);
162  lclColInds_(offset + j) = lid;
163  if (lid == ::Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
164  ++curNumBad;
165  }
166  }
167  }
168 
169  static OffsetType
170  run (const ::Kokkos::View<LO*, DT>& lclColInds,
171  const ::Kokkos::View<const GO*, DT>& gblColInds,
172  const ::Kokkos::View<const OffsetType*, DT>& ptr,
173  const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
174  const ::Kokkos::View<const NumEntType*, DT>& numRowEnt)
175  {
176  typedef ::Kokkos::RangePolicy<typename DT::execution_space, LO> range_type;
177  typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> functor_type;
178 
179  const LO lclNumRows = ptr.extent (0) == 0 ?
180  static_cast<LO> (0) : static_cast<LO> (ptr.extent (0) - 1);
181  OffsetType numBad = 0;
182  // Count of "bad" column indices is a reduction over rows.
183  ::Kokkos::parallel_reduce (range_type (0, lclNumRows),
184  functor_type (lclColInds, gblColInds, ptr,
185  lclColMap, numRowEnt),
186  numBad);
187  return numBad;
188  }
189 
190  private:
191  ::Kokkos::View<LO*, DT> lclColInds_;
192  ::Kokkos::View<const GO*, DT> gblColInds_;
193  ::Kokkos::View<const OffsetType*, DT> ptr_;
195  ::Kokkos::View<const NumEntType*, DT> numRowEnt_;
196  };
197 
198  } // namespace Impl
199 
214  template<class LO, class GO, class DT, class OffsetType, class NumEntType>
215  OffsetType
216  convertColumnIndicesFromGlobalToLocal (const Kokkos::View<LO*, DT>& lclColInds,
217  const Kokkos::View<const GO*, DT>& gblColInds,
218  const Kokkos::View<const OffsetType*, DT>& ptr,
219  const LocalMap<LO, GO, DT>& lclColMap,
220  const Kokkos::View<const NumEntType*, DT>& numRowEnt)
221  {
222  using Impl::ConvertColumnIndicesFromGlobalToLocal;
223  typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> impl_type;
224  return impl_type::run (lclColInds, gblColInds, ptr, lclColMap, numRowEnt);
225  }
226 
227  template<class ViewType, class LO>
228  class MaxDifference {
229  public:
230  MaxDifference (const ViewType& ptr) : ptr_ (ptr) {}
231 
232  KOKKOS_INLINE_FUNCTION void init (LO& dst) const {
233  dst = 0;
234  }
235 
236  KOKKOS_INLINE_FUNCTION void
237  join (LO& dst, const LO& src) const
238  {
239  dst = (src > dst) ? src : dst;
240  }
241 
242  KOKKOS_INLINE_FUNCTION void
243  operator () (const LO lclRow, LO& maxNumEnt) const
244  {
245  const LO numEnt = static_cast<LO> (ptr_(lclRow+1) - ptr_(lclRow));
246  maxNumEnt = (numEnt > maxNumEnt) ? numEnt : maxNumEnt;
247  }
248  private:
249  typename ViewType::const_type ptr_;
250  };
251 
252  template<class ViewType, class LO>
253  typename ViewType::non_const_value_type
254  maxDifference (const char kernelLabel[],
255  const ViewType& ptr,
256  const LO lclNumRows)
257  {
258  if (lclNumRows == 0) {
259  // mfh 07 May 2018: Weirdly, I need this special case,
260  // otherwise I get the wrong answer.
261  return static_cast<LO> (0);
262  }
263  else {
264  using execution_space = typename ViewType::execution_space;
265  using range_type = Kokkos::RangePolicy<execution_space, LO>;
266  LO theMaxNumEnt {0};
267  Kokkos::parallel_reduce (kernelLabel,
268  range_type (0, lclNumRows),
269  MaxDifference<ViewType, LO> (ptr),
270  theMaxNumEnt);
271  return theMaxNumEnt;
272  }
273  }
274 
275  } // namespace Details
276 
277  template <class LocalOrdinal, class GlobalOrdinal, class Node>
278  bool
279  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
280  getDebug() {
281  return Details::Behavior::debug("CrsGraph");
282  }
283 
284  template <class LocalOrdinal, class GlobalOrdinal, class Node>
285  bool
286  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
287  getVerbose() {
288  return Details::Behavior::verbose("CrsGraph");
289  }
290 
291  template <class LocalOrdinal, class GlobalOrdinal, class Node>
292  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
293  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
294  const size_t maxNumEntriesPerRow,
295  const Teuchos::RCP<Teuchos::ParameterList>& params) :
296  dist_object_type (rowMap)
297  , rowMap_ (rowMap)
298  , numAllocForAllRows_ (maxNumEntriesPerRow)
299  {
300  const char tfecfFuncName[] =
301  "CrsGraph(rowMap,maxNumEntriesPerRow,params): ";
302  staticAssertions ();
303  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
304  (maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid (),
305  std::invalid_argument, "The allocation hint maxNumEntriesPerRow must be "
306  "a valid size_t value, which in this case means it must not be "
307  "Teuchos::OrdinalTraits<size_t>::invalid().");
308  resumeFill (params);
310  }
311 
312  template <class LocalOrdinal, class GlobalOrdinal, class Node>
314  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
315  const Teuchos::RCP<const map_type>& colMap,
316  const size_t maxNumEntriesPerRow,
317  const Teuchos::RCP<Teuchos::ParameterList>& params) :
318  dist_object_type (rowMap)
319  , rowMap_ (rowMap)
320  , colMap_ (colMap)
321  , numAllocForAllRows_ (maxNumEntriesPerRow)
322  {
323  const char tfecfFuncName[] =
324  "CrsGraph(rowMap,colMap,maxNumEntriesPerRow,params): ";
325  staticAssertions ();
326  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
327  maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid (),
328  std::invalid_argument, "The allocation hint maxNumEntriesPerRow must be "
329  "a valid size_t value, which in this case means it must not be "
330  "Teuchos::OrdinalTraits<size_t>::invalid().");
331  resumeFill (params);
333  }
334 
335 
336  template <class LocalOrdinal, class GlobalOrdinal, class Node>
338  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
339  const Teuchos::ArrayView<const size_t>& numEntPerRow,
340  const Teuchos::RCP<Teuchos::ParameterList>& params) :
341  dist_object_type (rowMap)
342  , rowMap_ (rowMap)
343  , numAllocForAllRows_ (0)
344  {
345  const char tfecfFuncName[] =
346  "CrsGraph(rowMap,numEntPerRow,params): ";
347  staticAssertions ();
348 
349  const size_t lclNumRows = rowMap.is_null () ?
350  static_cast<size_t> (0) : rowMap->getLocalNumElements ();
351  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
352  static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
353  std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
354  << " != the local number of rows " << lclNumRows << " as specified by "
355  "the input row Map.");
356 
357  if (debug_) {
358  for (size_t r = 0; r < lclNumRows; ++r) {
359  const size_t curRowCount = numEntPerRow[r];
360  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
361  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
362  std::invalid_argument, "numEntPerRow(" << r << ") "
363  "specifies an invalid number of entries "
364  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
365  }
366  }
367 
368  // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
369  // The latter is a const View, so we have to copy into a nonconst
370  // View first, then assign.
371  typedef decltype (k_numAllocPerRow_) out_view_type;
372  typedef typename out_view_type::non_const_type nc_view_type;
373  typedef Kokkos::View<const size_t*,
374  typename nc_view_type::array_layout,
375  Kokkos::HostSpace,
376  Kokkos::MemoryUnmanaged> in_view_type;
377  in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
378  nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
379  lclNumRows);
380  // DEEP_COPY REVIEW - HOST-TO-HOSTMIRROR
381  using exec_space = typename nc_view_type::execution_space;
382  Kokkos::deep_copy (exec_space(), numAllocPerRowOut, numAllocPerRowIn);
383  k_numAllocPerRow_ = numAllocPerRowOut;
384 
385  resumeFill (params);
387  }
388 
389 
390 
391  template <class LocalOrdinal, class GlobalOrdinal, class Node>
393  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
394  const Kokkos::DualView<const size_t*, device_type>& numEntPerRow,
395  const Teuchos::RCP<Teuchos::ParameterList>& params) :
396  dist_object_type (rowMap)
397  , rowMap_ (rowMap)
398  , k_numAllocPerRow_ (numEntPerRow.h_view)
399  , numAllocForAllRows_ (0)
400  {
401  const char tfecfFuncName[] =
402  "CrsGraph(rowMap,numEntPerRow,params): ";
403  staticAssertions ();
404 
405  const size_t lclNumRows = rowMap.is_null () ?
406  static_cast<size_t> (0) : rowMap->getLocalNumElements ();
407  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
408  static_cast<size_t> (numEntPerRow.extent (0)) != lclNumRows,
409  std::invalid_argument, "numEntPerRow has length " <<
410  numEntPerRow.extent (0) << " != the local number of rows " <<
411  lclNumRows << " as specified by " "the input row Map.");
412 
413  if (debug_) {
414  for (size_t r = 0; r < lclNumRows; ++r) {
415  const size_t curRowCount = numEntPerRow.h_view(r);
416  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
417  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
418  std::invalid_argument, "numEntPerRow(" << r << ") "
419  "specifies an invalid number of entries "
420  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
421  }
422  }
423 
424  resumeFill (params);
426  }
427 
428 
429  template <class LocalOrdinal, class GlobalOrdinal, class Node>
431  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
432  const Teuchos::RCP<const map_type>& colMap,
433  const Kokkos::DualView<const size_t*, device_type>& numEntPerRow,
434  const Teuchos::RCP<Teuchos::ParameterList>& params) :
435  dist_object_type (rowMap)
436  , rowMap_ (rowMap)
437  , colMap_ (colMap)
438  , k_numAllocPerRow_ (numEntPerRow.h_view)
439  , numAllocForAllRows_ (0)
440  {
441  const char tfecfFuncName[] =
442  "CrsGraph(rowMap,colMap,numEntPerRow,params): ";
443  staticAssertions ();
444 
445  const size_t lclNumRows = rowMap.is_null () ?
446  static_cast<size_t> (0) : rowMap->getLocalNumElements ();
447  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
448  static_cast<size_t> (numEntPerRow.extent (0)) != lclNumRows,
449  std::invalid_argument, "numEntPerRow has length " <<
450  numEntPerRow.extent (0) << " != the local number of rows " <<
451  lclNumRows << " as specified by " "the input row Map.");
452 
453  if (debug_) {
454  for (size_t r = 0; r < lclNumRows; ++r) {
455  const size_t curRowCount = numEntPerRow.h_view(r);
456  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
457  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
458  std::invalid_argument, "numEntPerRow(" << r << ") "
459  "specifies an invalid number of entries "
460  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
461  }
462  }
463 
464  resumeFill (params);
466  }
467 
468 
469  template <class LocalOrdinal, class GlobalOrdinal, class Node>
471  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
472  const Teuchos::RCP<const map_type>& colMap,
473  const Teuchos::ArrayView<const size_t>& numEntPerRow,
474  const Teuchos::RCP<Teuchos::ParameterList>& params) :
475  dist_object_type (rowMap)
476  , rowMap_ (rowMap)
477  , colMap_ (colMap)
478  , numAllocForAllRows_ (0)
479  {
480  const char tfecfFuncName[] =
481  "CrsGraph(rowMap,colMap,numEntPerRow,params): ";
482  staticAssertions ();
483 
484  const size_t lclNumRows = rowMap.is_null () ?
485  static_cast<size_t> (0) : rowMap->getLocalNumElements ();
486  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
487  static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
488  std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
489  << " != the local number of rows " << lclNumRows << " as specified by "
490  "the input row Map.");
491 
492  if (debug_) {
493  for (size_t r = 0; r < lclNumRows; ++r) {
494  const size_t curRowCount = numEntPerRow[r];
495  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
496  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
497  std::invalid_argument, "numEntPerRow(" << r << ") "
498  "specifies an invalid number of entries "
499  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
500  }
501  }
502 
503  // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
504  // The latter is a const View, so we have to copy into a nonconst
505  // View first, then assign.
506  typedef decltype (k_numAllocPerRow_) out_view_type;
507  typedef typename out_view_type::non_const_type nc_view_type;
508  typedef Kokkos::View<const size_t*,
509  typename nc_view_type::array_layout,
510  Kokkos::HostSpace,
511  Kokkos::MemoryUnmanaged> in_view_type;
512  in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
513  nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
514  lclNumRows);
515  // DEEP_COPY REVIEW - HOST-TO-HOSTMIRROR
516  using exec_space = typename nc_view_type::execution_space;
517  Kokkos::deep_copy (exec_space(), numAllocPerRowOut, numAllocPerRowIn);
518  k_numAllocPerRow_ = numAllocPerRowOut;
519 
520  resumeFill (params);
522  }
523 
524 
525  template <class LocalOrdinal, class GlobalOrdinal, class Node>
528  const Teuchos::RCP<const map_type>& rowMap,
529  const Teuchos::RCP<Teuchos::ParameterList>& params) :
530  dist_object_type (rowMap)
531  , rowMap_(rowMap)
532  , colMap_(originalGraph.colMap_)
533  , numAllocForAllRows_(originalGraph.numAllocForAllRows_)
534  , storageStatus_(originalGraph.storageStatus_)
535  , indicesAreAllocated_(originalGraph.indicesAreAllocated_)
536  , indicesAreLocal_(originalGraph.indicesAreLocal_)
537  , indicesAreSorted_(originalGraph.indicesAreSorted_)
538  {
539  staticAssertions();
540 
541  int numRows = rowMap->getLocalNumElements();
542  size_t numNonZeros = originalGraph.getRowPtrsPackedHost()(numRows);
543  auto rowsToUse = Kokkos::pair<size_t, size_t>(0, numRows+1);
544 
545 
546  this->setRowPtrsUnpacked(Kokkos::subview(originalGraph.getRowPtrsUnpackedDevice(), rowsToUse));
547  this->setRowPtrsPacked(Kokkos::subview(originalGraph.getRowPtrsPackedDevice(), rowsToUse));
548 
549  if (indicesAreLocal_) {
550  lclIndsUnpacked_wdv = local_inds_wdv_type(originalGraph.lclIndsUnpacked_wdv, 0, numNonZeros);
551  lclIndsPacked_wdv = local_inds_wdv_type(originalGraph.lclIndsPacked_wdv, 0, numNonZeros);
552  }
553  else {
554  gblInds_wdv = global_inds_wdv_type(originalGraph.gblInds_wdv, 0, numNonZeros);
555  }
556 
558  }
559 
560  template <class LocalOrdinal, class GlobalOrdinal, class Node>
562  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
563  const Teuchos::RCP<const map_type>& colMap,
564  const typename local_graph_device_type::row_map_type& rowPointers,
565  const typename local_graph_device_type::entries_type::non_const_type& columnIndices,
566  const Teuchos::RCP<Teuchos::ParameterList>& params) :
567  dist_object_type (rowMap)
568  , rowMap_(rowMap)
569  , colMap_(colMap)
570  , numAllocForAllRows_(0)
571  , storageStatus_(Details::STORAGE_1D_PACKED)
572  , indicesAreAllocated_(true)
573  , indicesAreLocal_(true)
574  {
575  staticAssertions ();
576  if (! params.is_null() && params->isParameter("sorted") &&
577  ! params->get<bool>("sorted")) {
578  indicesAreSorted_ = false;
579  }
580  else {
581  indicesAreSorted_ = true;
582  }
583  setAllIndices (rowPointers, columnIndices);
585  }
586 
587  template <class LocalOrdinal, class GlobalOrdinal, class Node>
589  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
590  const Teuchos::RCP<const map_type>& colMap,
591  const Teuchos::ArrayRCP<size_t>& rowPointers,
592  const Teuchos::ArrayRCP<LocalOrdinal> & columnIndices,
593  const Teuchos::RCP<Teuchos::ParameterList>& params) :
594  dist_object_type (rowMap)
595  , rowMap_ (rowMap)
596  , colMap_ (colMap)
597  , numAllocForAllRows_ (0)
598  , storageStatus_ (Details::STORAGE_1D_PACKED)
599  , indicesAreAllocated_ (true)
600  , indicesAreLocal_ (true)
601  {
602  staticAssertions ();
603  if (! params.is_null() && params->isParameter("sorted") &&
604  ! params->get<bool>("sorted")) {
605  indicesAreSorted_ = false;
606  }
607  else {
608  indicesAreSorted_ = true;
609  }
610  setAllIndices (rowPointers, columnIndices);
612  }
613 
614  template <class LocalOrdinal, class GlobalOrdinal, class Node>
616  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
617  const Teuchos::RCP<const map_type>& colMap,
618  const local_graph_device_type& k_local_graph_,
619  const Teuchos::RCP<Teuchos::ParameterList>& params)
620  : CrsGraph (k_local_graph_,
621  rowMap,
622  colMap,
623  Teuchos::null,
624  Teuchos::null,
625  params)
626  {}
627 
628  template <class LocalOrdinal, class GlobalOrdinal, class Node>
630  CrsGraph (const local_graph_device_type& k_local_graph_,
631  const Teuchos::RCP<const map_type>& rowMap,
632  const Teuchos::RCP<const map_type>& colMap,
633  const Teuchos::RCP<const map_type>& domainMap,
634  const Teuchos::RCP<const map_type>& rangeMap,
635  const Teuchos::RCP<Teuchos::ParameterList>& params)
636  : DistObject<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, node_type> (rowMap)
637  , rowMap_ (rowMap)
638  , colMap_ (colMap)
639  , numAllocForAllRows_ (0)
640  , storageStatus_ (Details::STORAGE_1D_PACKED)
641  , indicesAreAllocated_ (true)
642  , indicesAreLocal_ (true)
643  {
644  staticAssertions();
645  const char tfecfFuncName[] = "CrsGraph(Kokkos::LocalStaticCrsGraph,Map,Map,Map,Map)";
646 
647  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
648  colMap.is_null (), std::runtime_error,
649  ": The input column Map must be nonnull.");
650  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
651  k_local_graph_.numRows () != rowMap->getLocalNumElements (),
652  std::runtime_error,
653  ": The input row Map and the input local graph need to have the same "
654  "number of rows. The row Map claims " << rowMap->getLocalNumElements ()
655  << " row(s), but the local graph claims " << k_local_graph_.numRows ()
656  << " row(s).");
657 
658  // NOTE (mfh 17 Mar 2014) getLocalNumRows() returns
659  // rowMap_->getLocalNumElements(), but it doesn't have to.
660  // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
661  // k_local_graph_.numRows () != getLocalNumRows (), std::runtime_error,
662  // ": The input row Map and the input local graph need to have the same "
663  // "number of rows. The row Map claims " << getLocalNumRows () << " row(s), "
664  // "but the local graph claims " << k_local_graph_.numRows () << " row(s).");
665  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
666  lclIndsUnpacked_wdv.extent (0) != 0 || gblInds_wdv.extent (0) != 0, std::logic_error,
667  ": cannot have 1D data structures allocated.");
668 
669  if(! params.is_null() && params->isParameter("sorted") &&
670  ! params->get<bool>("sorted")) {
671  indicesAreSorted_ = false;
672  }
673  else {
674  indicesAreSorted_ = true;
675  }
676 
677  setDomainRangeMaps (domainMap.is_null() ? rowMap_ : domainMap,
678  rangeMap .is_null() ? rowMap_ : rangeMap);
679  Teuchos::Array<int> remotePIDs (0); // unused output argument
680  this->makeImportExport (remotePIDs, false);
681 
682  lclIndsPacked_wdv = local_inds_wdv_type(k_local_graph_.entries);
684  this->setRowPtrs(k_local_graph_.row_map);
685 
686  set_need_sync_host_uvm_access(); // lclGraph_ potentially still in a kernel
687 
688  const bool callComputeGlobalConstants = params.get () == nullptr ||
689  params->get ("compute global constants", true);
690 
691  if (callComputeGlobalConstants) {
692  this->computeGlobalConstants ();
693  }
694  this->fillComplete_ = true;
695  this->checkInternalState ();
696  }
697 
698  template <class LocalOrdinal, class GlobalOrdinal, class Node>
701  const Teuchos::RCP<const map_type>& rowMap,
702  const Teuchos::RCP<const map_type>& colMap,
703  const Teuchos::RCP<const map_type>& domainMap,
704  const Teuchos::RCP<const map_type>& rangeMap,
705  const Teuchos::RCP<const import_type>& importer,
706  const Teuchos::RCP<const export_type>& exporter,
707  const Teuchos::RCP<Teuchos::ParameterList>& params) :
708  DistObject<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, node_type> (rowMap),
709  rowMap_ (rowMap),
710  colMap_ (colMap),
711  rangeMap_ (rangeMap.is_null () ? rowMap : rangeMap),
712  domainMap_ (domainMap.is_null () ? rowMap : domainMap),
713  importer_ (importer),
714  exporter_ (exporter),
715  numAllocForAllRows_ (0),
716  storageStatus_ (Details::STORAGE_1D_PACKED),
717  indicesAreAllocated_ (true),
718  indicesAreLocal_ (true)
719  {
720  staticAssertions();
721  const char tfecfFuncName[] = "Tpetra::CrsGraph(local_graph_device_type,"
722  "Map,Map,Map,Map,Import,Export,params): ";
723 
724  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
725  (colMap.is_null (), std::runtime_error,
726  "The input column Map must be nonnull.");
727 
728  lclIndsPacked_wdv = local_inds_wdv_type(lclGraph.entries);
730  setRowPtrs(lclGraph.row_map);
731 
732  set_need_sync_host_uvm_access(); // lclGraph_ potentially still in a kernel
733 
734  if (! params.is_null() && params->isParameter("sorted") &&
735  ! params->get<bool>("sorted")) {
736  indicesAreSorted_ = false;
737  }
738  else {
739  indicesAreSorted_ = true;
740  }
741 
742  const bool callComputeGlobalConstants =
743  params.get () == nullptr ||
744  params->get ("compute global constants", true);
745  if (callComputeGlobalConstants) {
746  this->computeGlobalConstants ();
747  }
748  fillComplete_ = true;
750  }
751 
752  template <class LocalOrdinal, class GlobalOrdinal, class Node>
753  Teuchos::RCP<const Teuchos::ParameterList>
756  {
757  using Teuchos::RCP;
758  using Teuchos::ParameterList;
759  using Teuchos::parameterList;
760 
761  RCP<ParameterList> params = parameterList ("Tpetra::CrsGraph");
762 
763  // Make a sublist for the Import.
764  RCP<ParameterList> importSublist = parameterList ("Import");
765 
766  // FIXME (mfh 02 Apr 2012) We should really have the Import and
767  // Export objects fill in these lists. However, we don't want to
768  // create an Import or Export unless we need them. For now, we
769  // know that the Import and Export just pass the list directly to
770  // their Distributor, so we can create a Distributor here
771  // (Distributor's constructor is a lightweight operation) and have
772  // it fill in the list.
773 
774  // Fill in Distributor default parameters by creating a
775  // Distributor and asking it to do the work.
776  Distributor distributor (rowMap_->getComm (), importSublist);
777  params->set ("Import", *importSublist, "How the Import performs communication.");
778 
779  // Make a sublist for the Export. For now, it's a clone of the
780  // Import sublist. It's not a shallow copy, though, since we
781  // might like the Import to do communication differently than the
782  // Export.
783  params->set ("Export", *importSublist, "How the Export performs communication.");
784 
785  return params;
786  }
787 
788  template <class LocalOrdinal, class GlobalOrdinal, class Node>
789  void
791  setParameterList (const Teuchos::RCP<Teuchos::ParameterList>& params)
792  {
793  Teuchos::RCP<const Teuchos::ParameterList> validParams =
794  getValidParameters ();
795  params->validateParametersAndSetDefaults (*validParams);
796  this->setMyParamList (params);
797  }
798 
799  template <class LocalOrdinal, class GlobalOrdinal, class Node>
803  {
804  return rowMap_->getGlobalNumElements ();
805  }
806 
807  template <class LocalOrdinal, class GlobalOrdinal, class Node>
811  {
812  const char tfecfFuncName[] = "getGlobalNumCols: ";
813  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
814  ! isFillComplete () || getDomainMap ().is_null (), std::runtime_error,
815  "The graph does not have a domain Map. You may not call this method in "
816  "that case.");
817  return getDomainMap ()->getGlobalNumElements ();
818  }
819 
820 
821  template <class LocalOrdinal, class GlobalOrdinal, class Node>
822  size_t
825  {
826  return this->rowMap_.is_null () ?
827  static_cast<size_t> (0) :
828  this->rowMap_->getLocalNumElements ();
829  }
830 
831 
832  template <class LocalOrdinal, class GlobalOrdinal, class Node>
833  size_t
836  {
837  const char tfecfFuncName[] = "getLocalNumCols: ";
838  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
839  ! hasColMap (), std::runtime_error,
840  "The graph does not have a column Map. You may not call this method "
841  "unless the graph has a column Map. This requires either that a custom "
842  "column Map was given to the constructor, or that fillComplete() has "
843  "been called.");
844  return colMap_.is_null () ? static_cast<size_t> (0) :
845  colMap_->getLocalNumElements ();
846  }
847 
848 
849 
850  template <class LocalOrdinal, class GlobalOrdinal, class Node>
851  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
853  getRowMap () const
854  {
855  return rowMap_;
856  }
857 
858  template <class LocalOrdinal, class GlobalOrdinal, class Node>
859  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
861  getColMap () const
862  {
863  return colMap_;
864  }
865 
866  template <class LocalOrdinal, class GlobalOrdinal, class Node>
867  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
870  {
871  return domainMap_;
872  }
873 
874  template <class LocalOrdinal, class GlobalOrdinal, class Node>
875  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
877  getRangeMap () const
878  {
879  return rangeMap_;
880  }
881 
882  template <class LocalOrdinal, class GlobalOrdinal, class Node>
883  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::import_type>
885  getImporter () const
886  {
887  return importer_;
888  }
889 
890  template <class LocalOrdinal, class GlobalOrdinal, class Node>
891  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::export_type>
893  getExporter () const
894  {
895  return exporter_;
896  }
897 
898  template <class LocalOrdinal, class GlobalOrdinal, class Node>
899  bool
901  hasColMap () const
902  {
903  return ! colMap_.is_null ();
904  }
905 
906  template <class LocalOrdinal, class GlobalOrdinal, class Node>
907  bool
910  {
911  // FIXME (mfh 07 Aug 2014) Why wouldn't storage be optimized if
912  // getLocalNumRows() is zero?
913 
914  const bool isOpt = indicesAreAllocated_ &&
915  k_numRowEntries_.extent (0) == 0 &&
916  getLocalNumRows () > 0;
917 
918  return isOpt;
919  }
920 
921 
922  template <class LocalOrdinal, class GlobalOrdinal, class Node>
926  {
927  const char tfecfFuncName[] = "getGlobalNumEntries: ";
928  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
929  (! this->haveGlobalConstants_, std::logic_error,
930  "The graph does not have global constants computed, "
931  "but the user has requested them.");
932 
933  return globalNumEntries_;
934  }
935 
936 
937  template <class LocalOrdinal, class GlobalOrdinal, class Node>
938  size_t
941  {
942  const char tfecfFuncName[] = "getLocalNumEntries: ";
943  typedef LocalOrdinal LO;
944 
945  if (this->indicesAreAllocated_) {
946  const LO lclNumRows = this->getLocalNumRows ();
947  if (lclNumRows == 0) {
948  return static_cast<size_t> (0);
949  }
950  else {
951  // Avoid the "*this capture" issue by creating a local Kokkos::View.
952  auto numEntPerRow = this->k_numRowEntries_;
953  const LO numNumEntPerRow = numEntPerRow.extent (0);
954  if (numNumEntPerRow == 0) {
955  if (static_cast<LO> (this->getRowPtrsPackedDevice().extent (0)) <
956  static_cast<LO> (lclNumRows + 1)) {
957  return static_cast<size_t> (0);
958  }
959  else {
960  // indices are allocated and k_numRowEntries_ is not allocated,
961  // so we have packed storage and the length of lclIndsPacked_wdv
962  // must be the number of local entries.
963  if(debug_) {
964  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
965  (this->getRowPtrsPackedHost()(lclNumRows) != lclIndsPacked_wdv.extent(0), std::logic_error,
966  "Final entry of packed host rowptrs doesn't match the length of lclIndsPacked");
967  }
968  return lclIndsPacked_wdv.extent(0);
969  }
970  }
971  else { // k_numRowEntries_ is populated
972  // k_numRowEntries_ is actually be a host View, so we run
973  // the sum in its native execution space. This also means
974  // that we can use explicit capture (which could perhaps
975  // improve build time) instead of KOKKOS_LAMBDA, and avoid
976  // any CUDA build issues with trying to run a __device__ -
977  // only function on host.
978  typedef typename num_row_entries_type::execution_space
979  host_exec_space;
980  typedef Kokkos::RangePolicy<host_exec_space, LO> range_type;
981 
982  const LO upperLoopBound = lclNumRows < numNumEntPerRow ?
983  lclNumRows :
984  numNumEntPerRow;
985  size_t nodeNumEnt = 0;
986  Kokkos::parallel_reduce ("Tpetra::CrsGraph::getNumNodeEntries",
987  range_type (0, upperLoopBound),
988  [=] (const LO& k, size_t& lclSum) {
989  lclSum += numEntPerRow(k);
990  }, nodeNumEnt);
991  return nodeNumEnt;
992  }
993  }
994  }
995  else { // nothing allocated on this process, so no entries
996  return static_cast<size_t> (0);
997  }
998  }
999 
1000  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1004  {
1005  const char tfecfFuncName[] = "getGlobalMaxNumRowEntries: ";
1006  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1007  (! this->haveGlobalConstants_, std::logic_error,
1008  "The graph does not have global constants computed, "
1009  "but the user has requested them.");
1010 
1011  return globalMaxNumRowEntries_;
1012  }
1013 
1014  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1015  size_t
1018  {
1019  return nodeMaxNumRowEntries_;
1020  }
1021 
1022  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1023  bool
1026  {
1027  return fillComplete_;
1028  }
1029 
1030  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1031  bool
1034  {
1035  return ! fillComplete_;
1036  }
1037 
1038 
1039  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1040  bool
1043  {
1044  return indicesAreLocal_;
1045  }
1046 
1047  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1048  bool
1051  {
1052  return indicesAreGlobal_;
1053  }
1054 
1055  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1056  size_t
1059  {
1060  typedef LocalOrdinal LO;
1061 
1062  if (this->indicesAreAllocated_) {
1063  const LO lclNumRows = this->getLocalNumRows ();
1064  if (lclNumRows == 0) {
1065  return static_cast<size_t> (0);
1066  }
1067  else if (storageStatus_ == Details::STORAGE_1D_PACKED) {
1068  if (static_cast<LO> (this->getRowPtrsPackedDevice().extent (0)) <
1069  static_cast<LO> (lclNumRows + 1)) {
1070  return static_cast<size_t> (0);
1071  }
1072  else {
1073  if(this->isLocallyIndexed())
1074  return lclIndsPacked_wdv.extent(0);
1075  else
1076  return gblInds_wdv.extent(0);
1077  }
1078  }
1079  else if (storageStatus_ == Details::STORAGE_1D_UNPACKED) {
1080  auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
1081  if (rowPtrsUnpacked_host.extent (0) == 0) {
1082  return static_cast<size_t> (0);
1083  }
1084  else {
1085  if(this->isLocallyIndexed())
1086  return lclIndsUnpacked_wdv.extent(0);
1087  else
1088  return gblInds_wdv.extent(0);
1089  }
1090  }
1091  else {
1092  return static_cast<size_t> (0);
1093  }
1094  }
1095  else {
1096  return Tpetra::Details::OrdinalTraits<size_t>::invalid ();
1097  }
1098  }
1099 
1100  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1101  Teuchos::RCP<const Teuchos::Comm<int> >
1103  getComm () const
1104  {
1105  return this->rowMap_.is_null () ? Teuchos::null : this->rowMap_->getComm ();
1106  }
1107 
1108  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1109  GlobalOrdinal
1112  {
1113  return rowMap_->getIndexBase ();
1114  }
1115 
1116  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1117  bool
1119  indicesAreAllocated () const
1120  {
1121  return indicesAreAllocated_;
1122  }
1123 
1124  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1125  bool
1127  isSorted () const
1128  {
1129  return indicesAreSorted_;
1130  }
1131 
1132  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1133  bool
1135  isMerged () const
1136  {
1137  return noRedundancies_;
1138  }
1139 
1140  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1141  void
1144  {
1145  // FIXME (mfh 07 May 2013) How do we know that the change
1146  // introduced a redundancy, or even that it invalidated the sorted
1147  // order of indices? CrsGraph has always made this conservative
1148  // guess. It could be a bit costly to check at insertion time,
1149  // though.
1150  indicesAreSorted_ = false;
1151  noRedundancies_ = false;
1152 
1153  // We've modified the graph, so we'll have to recompute local
1154  // constants like the number of diagonal entries on this process.
1155  haveLocalConstants_ = false;
1156  }
1157 
1158  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1159  void
1161  allocateIndices (const ELocalGlobal lg, const bool verbose)
1162  {
1164  using Teuchos::arcp;
1165  using Teuchos::Array;
1166  using Teuchos::ArrayRCP;
1167  using std::endl;
1168  typedef Teuchos::ArrayRCP<size_t>::size_type size_type;
1169  typedef typename local_graph_device_type::row_map_type::non_const_type
1170  non_const_row_map_type;
1171  const char tfecfFuncName[] = "allocateIndices: ";
1172  const char suffix[] =
1173  " Please report this bug to the Tpetra developers.";
1174  ProfilingRegion profRegion("Tpetra::CrsGraph::allocateIndices");
1175 
1176  std::unique_ptr<std::string> prefix;
1177  if (verbose) {
1178  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
1179  std::ostringstream os;
1180  os << *prefix << "Start: lg="
1181  << (lg == GlobalIndices ? "GlobalIndices" : "LocalIndices")
1182  << ", numRows: " << this->getLocalNumRows() << endl;
1183  std::cerr << os.str();
1184  }
1185 
1186  // This is a protected function, only callable by us. If it was
1187  // called incorrectly, it is our fault. That's why the tests
1188  // below throw std::logic_error instead of std::invalid_argument.
1189  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1190  (isLocallyIndexed () && lg == GlobalIndices, std::logic_error,
1191  ": The graph is locally indexed, but Tpetra code is calling "
1192  "this method with lg=GlobalIndices." << suffix);
1193  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1194  (isGloballyIndexed () && lg == LocalIndices, std::logic_error,
1195  ": The graph is globally indexed, but Tpetra code is calling "
1196  "this method with lg=LocalIndices." << suffix);
1197  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1198  (indicesAreAllocated (), std::logic_error, ": The graph's "
1199  "indices are already allocated, but Tpetra is calling "
1200  "allocateIndices again." << suffix);
1201  const size_t numRows = this->getLocalNumRows ();
1202 
1203  //
1204  // STATIC ALLOCATION PROFILE
1205  //
1206  size_type numInds = 0;
1207  {
1208  if (verbose) {
1209  std::ostringstream os;
1210  os << *prefix << "Allocate k_rowPtrs: " << (numRows+1) << endl;
1211  std::cerr << os.str();
1212  }
1213  non_const_row_map_type k_rowPtrs ("Tpetra::CrsGraph::ptr", numRows + 1);
1214 
1215  if (this->k_numAllocPerRow_.extent (0) != 0) {
1216  // It's OK to throw std::invalid_argument here, because we
1217  // haven't incurred any side effects yet. Throwing that
1218  // exception (and not, say, std::logic_error) implies that the
1219  // instance can recover.
1220  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1221  (this->k_numAllocPerRow_.extent (0) != numRows,
1222  std::invalid_argument, "k_numAllocPerRow_ is allocated, that is, "
1223  "has nonzero length " << this->k_numAllocPerRow_.extent (0)
1224  << ", but its length != numRows = " << numRows << ".");
1225 
1226  // k_numAllocPerRow_ is a host View, but k_rowPtrs (the thing
1227  // we want to compute here) lives on device. That's OK;
1228  // computeOffsetsFromCounts can handle this case.
1230 
1231  // FIXME (mfh 27 Jun 2016) Currently, computeOffsetsFromCounts
1232  // doesn't attempt to check its input for "invalid" flag
1233  // values. For now, we omit that feature of the sequential
1234  // code disabled below.
1235  numInds = computeOffsetsFromCounts (k_rowPtrs, k_numAllocPerRow_);
1236  }
1237  else {
1238  // It's OK to throw std::invalid_argument here, because we
1239  // haven't incurred any side effects yet. Throwing that
1240  // exception (and not, say, std::logic_error) implies that the
1241  // instance can recover.
1242  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1243  (this->numAllocForAllRows_ ==
1244  Tpetra::Details::OrdinalTraits<size_t>::invalid (),
1245  std::invalid_argument, "numAllocForAllRows_ has an invalid value, "
1246  "namely Tpetra::Details::OrdinalTraits<size_t>::invalid() = " <<
1247  Tpetra::Details::OrdinalTraits<size_t>::invalid () << ".");
1248 
1250  numInds = computeOffsetsFromConstantCount (k_rowPtrs, this->numAllocForAllRows_);
1251  }
1252  // "Commit" the resulting row offsets.
1253  setRowPtrsUnpacked(k_rowPtrs);
1254  }
1255  if(debug_) {
1256  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1257  (numInds != size_type(this->getRowPtrsUnpackedHost()(numRows)), std::logic_error,
1258  ": Number of indices produced by computeOffsetsFrom[Constant]Counts "
1259  "does not match final entry of rowptrs unpacked");
1260  }
1261 
1262 
1263  if (lg == LocalIndices) {
1264  if (verbose) {
1265  std::ostringstream os;
1266  os << *prefix << "Allocate local column indices "
1267  "lclIndsUnpacked_wdv: " << numInds << endl;
1268  std::cerr << os.str();
1269  }
1270  lclIndsUnpacked_wdv = local_inds_wdv_type (
1271  local_inds_dualv_type("Tpetra::CrsGraph::lclInd",numInds));
1272  }
1273  else {
1274  if (verbose) {
1275  std::ostringstream os;
1276  os << *prefix << "Allocate global column indices "
1277  "gblInds_wdv: " << numInds << endl;
1278  std::cerr << os.str();
1279  }
1280  gblInds_wdv = global_inds_wdv_type (
1281  global_inds_dualv_type("Tpetra::CrsGraph::gblInd",numInds));
1282  }
1283  storageStatus_ = Details::STORAGE_1D_UNPACKED;
1284 
1285  this->indicesAreLocal_ = (lg == LocalIndices);
1286  this->indicesAreGlobal_ = (lg == GlobalIndices);
1287 
1288  if (numRows > 0) { // reallocate k_numRowEntries_ & fill w/ 0s
1289  using Kokkos::ViewAllocateWithoutInitializing;
1290  const char label[] = "Tpetra::CrsGraph::numRowEntries";
1291  if (verbose) {
1292  std::ostringstream os;
1293  os << *prefix << "Allocate k_numRowEntries_: " << numRows
1294  << endl;
1295  std::cerr << os.str();
1296  }
1297  num_row_entries_type numRowEnt (ViewAllocateWithoutInitializing (label), numRows);
1298  // DEEP_COPY REVIEW - VALUE-TO-HOSTMIRROR
1299  Kokkos::deep_copy (execution_space(), numRowEnt, static_cast<size_t> (0)); // fill w/ 0s
1300  Kokkos::fence("CrsGraph::allocateIndices"); // TODO: Need to understand downstream failure points and move this fence.
1301  this->k_numRowEntries_ = numRowEnt; // "commit" our allocation
1302  }
1303 
1304  // Once indices are allocated, CrsGraph needs to free this information.
1305  this->numAllocForAllRows_ = 0;
1306  this->k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
1307  this->indicesAreAllocated_ = true;
1308 
1309  try {
1310  this->checkInternalState ();
1311  }
1312  catch (std::logic_error& e) {
1313  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1314  (true, std::logic_error, "At end of allocateIndices, "
1315  "checkInternalState threw std::logic_error: "
1316  << e.what ());
1317  }
1318  catch (std::exception& e) {
1319  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1320  (true, std::runtime_error, "At end of allocateIndices, "
1321  "checkInternalState threw std::exception: "
1322  << e.what ());
1323  }
1324  catch (...) {
1325  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1326  (true, std::runtime_error, "At end of allocateIndices, "
1327  "checkInternalState threw an exception "
1328  "not a subclass of std::exception.");
1329  }
1330 
1331  if (verbose) {
1332  std::ostringstream os;
1333  os << *prefix << "Done" << endl;
1334  std::cerr << os.str();
1335  }
1336  }
1337 
1338  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1339  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1340  local_inds_dualv_type::t_host::const_type
1342  getLocalIndsViewHost (const RowInfo& rowinfo) const
1343  {
1344  if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1345  return typename local_inds_dualv_type::t_host::const_type ();
1346  else
1347  return lclIndsUnpacked_wdv.getHostSubview(rowinfo.offset1D,
1348  rowinfo.allocSize,
1349  Access::ReadOnly);
1350  }
1351 
1352  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1354  local_inds_dualv_type::t_host
1357  {
1358  if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1359  return typename local_inds_dualv_type::t_host ();
1360  else
1361  return lclIndsUnpacked_wdv.getHostSubview(rowinfo.offset1D,
1362  rowinfo.allocSize,
1363  Access::ReadWrite);
1364  }
1365 
1366  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1368  global_inds_dualv_type::t_host::const_type
1370  getGlobalIndsViewHost (const RowInfo& rowinfo) const
1371  {
1372  if (rowinfo.allocSize == 0 || gblInds_wdv.extent(0) == 0)
1373  return typename global_inds_dualv_type::t_host::const_type ();
1374  else
1375  return gblInds_wdv.getHostSubview(rowinfo.offset1D,
1376  rowinfo.allocSize,
1377  Access::ReadOnly);
1378  }
1379 
1380  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1382  local_inds_dualv_type::t_dev::const_type
1384  getLocalIndsViewDevice (const RowInfo& rowinfo) const
1385  {
1386  if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1387  return typename local_inds_dualv_type::t_dev::const_type ();
1388  else
1389  return lclIndsUnpacked_wdv.getDeviceSubview(rowinfo.offset1D,
1390  rowinfo.allocSize,
1391  Access::ReadOnly);
1392  }
1393 
1394  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1396  global_inds_dualv_type::t_dev::const_type
1398  getGlobalIndsViewDevice (const RowInfo& rowinfo) const
1399  {
1400  if (rowinfo.allocSize == 0 || gblInds_wdv.extent(0) == 0)
1401  return typename global_inds_dualv_type::t_dev::const_type ();
1402  else
1403  return gblInds_wdv.getDeviceSubview(rowinfo.offset1D,
1404  rowinfo.allocSize,
1405  Access::ReadOnly);
1406  }
1407 
1408 
1409  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1410  RowInfo
1412  getRowInfo (const LocalOrdinal myRow) const
1413  {
1414  const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1415  RowInfo ret;
1416  if (this->rowMap_.is_null () || ! this->rowMap_->isNodeLocalElement (myRow)) {
1417  ret.localRow = STINV;
1418  ret.allocSize = 0;
1419  ret.numEntries = 0;
1420  ret.offset1D = STINV;
1421  return ret;
1422  }
1423 
1424  ret.localRow = static_cast<size_t> (myRow);
1425  if (this->indicesAreAllocated ()) {
1426  auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
1427  // Offsets tell us the allocation size in this case.
1428  if (rowPtrsUnpacked_host.extent (0) == 0) {
1429  ret.offset1D = 0;
1430  ret.allocSize = 0;
1431  }
1432  else {
1433  ret.offset1D = rowPtrsUnpacked_host(myRow);
1434  ret.allocSize = rowPtrsUnpacked_host(myRow+1) - rowPtrsUnpacked_host(myRow);
1435  }
1436 
1437  ret.numEntries = (this->k_numRowEntries_.extent (0) == 0) ?
1438  ret.allocSize :
1439  this->k_numRowEntries_(myRow);
1440  }
1441  else { // haven't performed allocation yet; probably won't hit this code
1442  // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1443  // allocate, rather than doing lazy allocation at first insert.
1444  // This will make k_numAllocPerRow_ obsolete.
1445  ret.allocSize = (this->k_numAllocPerRow_.extent (0) != 0) ?
1446  this->k_numAllocPerRow_(myRow) : // this is a host View
1447  this->numAllocForAllRows_;
1448  ret.numEntries = 0;
1449  ret.offset1D = STINV;
1450  }
1451 
1452  return ret;
1453  }
1454 
1455 
1456  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1457  RowInfo
1459  getRowInfoFromGlobalRowIndex (const GlobalOrdinal gblRow) const
1460  {
1461  const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1462  RowInfo ret;
1463  if (this->rowMap_.is_null ()) {
1464  ret.localRow = STINV;
1465  ret.allocSize = 0;
1466  ret.numEntries = 0;
1467  ret.offset1D = STINV;
1468  return ret;
1469  }
1470  const LocalOrdinal myRow = this->rowMap_->getLocalElement (gblRow);
1471  if (myRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid ()) {
1472  ret.localRow = STINV;
1473  ret.allocSize = 0;
1474  ret.numEntries = 0;
1475  ret.offset1D = STINV;
1476  return ret;
1477  }
1478 
1479  ret.localRow = static_cast<size_t> (myRow);
1480  if (this->indicesAreAllocated ()) {
1481  // graph data structures have the info that we need
1482  //
1483  // if static graph, offsets tell us the allocation size
1484  auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
1485  if (rowPtrsUnpacked_host.extent (0) == 0) {
1486  ret.offset1D = 0;
1487  ret.allocSize = 0;
1488  }
1489  else {
1490  ret.offset1D = rowPtrsUnpacked_host(myRow);
1491  ret.allocSize = rowPtrsUnpacked_host(myRow+1) - rowPtrsUnpacked_host(myRow);
1492  }
1493 
1494  ret.numEntries = (this->k_numRowEntries_.extent (0) == 0) ?
1495  ret.allocSize :
1496  this->k_numRowEntries_(myRow);
1497  }
1498  else { // haven't performed allocation yet; probably won't hit this code
1499  // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1500  // allocate, rather than doing lazy allocation at first insert.
1501  // This will make k_numAllocPerRow_ obsolete.
1502  ret.allocSize = (this->k_numAllocPerRow_.extent (0) != 0) ?
1503  this->k_numAllocPerRow_(myRow) : // this is a host View
1504  this->numAllocForAllRows_;
1505  ret.numEntries = 0;
1506  ret.offset1D = STINV;
1507  }
1508 
1509  return ret;
1510  }
1511 
1512 
1513  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1514  void
1516  staticAssertions () const
1517  {
1518  using Teuchos::OrdinalTraits;
1519  typedef LocalOrdinal LO;
1520  typedef GlobalOrdinal GO;
1521  typedef global_size_t GST;
1522 
1523  // Assumption: sizeof(GlobalOrdinal) >= sizeof(LocalOrdinal):
1524  // This is so that we can store local indices in the memory
1525  // formerly occupied by global indices.
1526  static_assert (sizeof (GlobalOrdinal) >= sizeof (LocalOrdinal),
1527  "Tpetra::CrsGraph: sizeof(GlobalOrdinal) must be >= sizeof(LocalOrdinal).");
1528  // Assumption: max(size_t) >= max(LocalOrdinal)
1529  // This is so that we can represent any LocalOrdinal as a size_t.
1530  static_assert (sizeof (size_t) >= sizeof (LocalOrdinal),
1531  "Tpetra::CrsGraph: sizeof(size_t) must be >= sizeof(LocalOrdinal).");
1532  static_assert (sizeof(GST) >= sizeof(size_t),
1533  "Tpetra::CrsGraph: sizeof(Tpetra::global_size_t) must be >= sizeof(size_t).");
1534 
1535  // FIXME (mfh 30 Sep 2015) We're not using
1536  // Teuchos::CompileTimeAssert any more. Can we do these checks
1537  // with static_assert?
1538 
1539  // can't call max() with CompileTimeAssert, because it isn't a
1540  // constant expression; will need to make this a runtime check
1541  const char msg[] = "Tpetra::CrsGraph: Object cannot be created with the "
1542  "given template arguments: size assumptions are not valid.";
1543  TEUCHOS_TEST_FOR_EXCEPTION(
1544  static_cast<size_t> (Teuchos::OrdinalTraits<LO>::max ()) > Teuchos::OrdinalTraits<size_t>::max (),
1545  std::runtime_error, msg);
1546  TEUCHOS_TEST_FOR_EXCEPTION(
1547  static_cast<GST> (Teuchos::OrdinalTraits<LO>::max ()) > static_cast<GST> (Teuchos::OrdinalTraits<GO>::max ()),
1548  std::runtime_error, msg);
1549  TEUCHOS_TEST_FOR_EXCEPTION(
1550  static_cast<size_t> (Teuchos::OrdinalTraits<GO>::max ()) > Teuchos::OrdinalTraits<GST>::max(),
1551  std::runtime_error, msg);
1552  TEUCHOS_TEST_FOR_EXCEPTION(
1553  Teuchos::OrdinalTraits<size_t>::max () > Teuchos::OrdinalTraits<GST>::max (),
1554  std::runtime_error, msg);
1555  }
1556 
1557 
1558  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1559  size_t
1562  const SLocalGlobalViews &newInds,
1563  const ELocalGlobal lg,
1564  const ELocalGlobal I)
1565  {
1566  using Teuchos::ArrayView;
1567  typedef LocalOrdinal LO;
1568  typedef GlobalOrdinal GO;
1569  const char tfecfFuncName[] = "insertIndices: ";
1570 
1571  size_t oldNumEnt = 0;
1572  if (debug_) {
1573  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1574  (lg != GlobalIndices && lg != LocalIndices, std::invalid_argument,
1575  "lg must be either GlobalIndices or LocalIndices.");
1576  oldNumEnt = this->getNumEntriesInLocalRow (rowinfo.localRow);
1577  }
1578 
1579  size_t numNewInds = 0;
1580  if (lg == GlobalIndices) { // input indices are global
1581  ArrayView<const GO> new_ginds = newInds.ginds;
1582  numNewInds = new_ginds.size();
1583  if (I == GlobalIndices) { // store global indices
1584  auto gind_view = gblInds_wdv.getHostView(Access::ReadWrite);
1585  if (debug_) {
1586  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1587  (static_cast<size_t> (gind_view.size ()) <
1588  rowinfo.numEntries + numNewInds, std::logic_error,
1589  "gind_view.size() = " << gind_view.size ()
1590  << " < rowinfo.numEntries (= " << rowinfo.numEntries
1591  << ") + numNewInds (= " << numNewInds << ").");
1592  }
1593  GO* const gblColInds_out = gind_view.data () + rowinfo.offset1D
1594  + rowinfo.numEntries;
1595  for (size_t k = 0; k < numNewInds; ++k) {
1596  gblColInds_out[k] = new_ginds[k];
1597  }
1598  }
1599  else if (I == LocalIndices) { // store local indices
1600  auto lind_view = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1601  if (debug_) {
1602  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1603  (static_cast<size_t> (lind_view.size ()) <
1604  rowinfo.numEntries + numNewInds, std::logic_error,
1605  "lind_view.size() = " << lind_view.size ()
1606  << " < rowinfo.numEntries (= " << rowinfo.numEntries
1607  << ") + numNewInds (= " << numNewInds << ").");
1608  }
1609  LO* const lclColInds_out = lind_view.data () + rowinfo.offset1D
1610  + rowinfo.numEntries;
1611  for (size_t k = 0; k < numNewInds; ++k) {
1612  lclColInds_out[k] = colMap_->getLocalElement (new_ginds[k]);
1613  }
1614  }
1615  }
1616  else if (lg == LocalIndices) { // input indices are local
1617  ArrayView<const LO> new_linds = newInds.linds;
1618  numNewInds = new_linds.size();
1619  if (I == LocalIndices) { // store local indices
1620  auto lind_view = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1621  if (debug_) {
1622  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1623  (static_cast<size_t> (lind_view.size ()) <
1624  rowinfo.numEntries + numNewInds, std::logic_error,
1625  "lind_view.size() = " << lind_view.size ()
1626  << " < rowinfo.numEntries (= " << rowinfo.numEntries
1627  << ") + numNewInds (= " << numNewInds << ").");
1628  }
1629  LO* const lclColInds_out = lind_view.data () + rowinfo.offset1D
1630  + rowinfo.numEntries;
1631  for (size_t k = 0; k < numNewInds; ++k) {
1632  lclColInds_out[k] = new_linds[k];
1633  }
1634  }
1635  else if (I == GlobalIndices) {
1636  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1637  (true, std::logic_error, "The case where the input indices are local "
1638  "and the indices to write are global (lg=LocalIndices, I="
1639  "GlobalIndices) is not implemented, because it does not make sense."
1640  << std::endl << "If you have correct local column indices, that "
1641  "means the graph has a column Map. In that case, you should be "
1642  "storing local indices.");
1643  }
1644  }
1645 
1646  rowinfo.numEntries += numNewInds;
1647  this->k_numRowEntries_(rowinfo.localRow) += numNewInds;
1648  this->setLocallyModified ();
1649 
1650  if (debug_) {
1651  const size_t chkNewNumEnt =
1652  this->getNumEntriesInLocalRow (rowinfo.localRow);
1653  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1654  (chkNewNumEnt != oldNumEnt + numNewInds, std::logic_error,
1655  "chkNewNumEnt = " << chkNewNumEnt
1656  << " != oldNumEnt (= " << oldNumEnt
1657  << ") + numNewInds (= " << numNewInds << ").");
1658  }
1659 
1660  return numNewInds;
1661  }
1662 
1663  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1664  size_t
1666  insertGlobalIndicesImpl (const LocalOrdinal lclRow,
1667  const GlobalOrdinal inputGblColInds[],
1668  const size_t numInputInds)
1669  {
1670  return this->insertGlobalIndicesImpl (this->getRowInfo (lclRow),
1671  inputGblColInds, numInputInds);
1672  }
1673 
1674  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1675  size_t
1678  const GlobalOrdinal inputGblColInds[],
1679  const size_t numInputInds,
1680  std::function<void(const size_t, const size_t, const size_t)> fun)
1681  {
1683  using Kokkos::View;
1684  using Kokkos::subview;
1685  using Kokkos::MemoryUnmanaged;
1686  using Teuchos::ArrayView;
1687  using LO = LocalOrdinal;
1688  using GO = GlobalOrdinal;
1689  const char tfecfFuncName[] = "insertGlobalIndicesImpl: ";
1690  const LO lclRow = static_cast<LO> (rowInfo.localRow);
1691 
1692  auto numEntries = rowInfo.numEntries;
1693  using inp_view_type = View<const GO*, Kokkos::HostSpace, MemoryUnmanaged>;
1694  inp_view_type inputInds(inputGblColInds, numInputInds);
1695  size_t numInserted;
1696  {
1697  auto gblIndsHostView = this->gblInds_wdv.getHostView(Access::ReadWrite);
1698  numInserted = Details::insertCrsIndices(lclRow, this->getRowPtrsUnpackedHost(),
1699  gblIndsHostView,
1700  numEntries, inputInds, fun);
1701  }
1702 
1703  const bool insertFailed =
1704  numInserted == Teuchos::OrdinalTraits<size_t>::invalid();
1705  if(insertFailed) {
1706  constexpr size_t ONE (1);
1707  const int myRank = this->getComm()->getRank();
1708  std::ostringstream os;
1709 
1710  os << "Proc " << myRank << ": Not enough capacity to insert "
1711  << numInputInds
1712  << " ind" << (numInputInds != ONE ? "ices" : "ex")
1713  << " into local row " << lclRow << ", which currently has "
1714  << rowInfo.numEntries
1715  << " entr" << (rowInfo.numEntries != ONE ? "ies" : "y")
1716  << " and total allocation size " << rowInfo.allocSize
1717  << ". ";
1718  const size_t maxNumToPrint =
1720  ArrayView<const GO> inputGblColIndsView(inputGblColInds,
1721  numInputInds);
1722  verbosePrintArray(os, inputGblColIndsView, "Input global "
1723  "column indices", maxNumToPrint);
1724  os << ", ";
1725  auto curGblColInds = getGlobalIndsViewHost(rowInfo);
1726  ArrayView<const GO> curGblColIndsView(curGblColInds.data(),
1727  rowInfo.numEntries);
1728  verbosePrintArray(os, curGblColIndsView, "Current global "
1729  "column indices", maxNumToPrint);
1730  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1731  (true, std::runtime_error, os.str());
1732  }
1733 
1734  this->k_numRowEntries_(lclRow) += numInserted;
1735 
1736  this->setLocallyModified();
1737  return numInserted;
1738  }
1739 
1740 
1741  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1742  void
1744  insertLocalIndicesImpl (const LocalOrdinal myRow,
1745  const Teuchos::ArrayView<const LocalOrdinal>& indices,
1746  std::function<void(const size_t, const size_t, const size_t)> fun)
1747  {
1748  using Kokkos::MemoryUnmanaged;
1749  using Kokkos::subview;
1750  using Kokkos::View;
1751  using LO = LocalOrdinal;
1752  const char tfecfFuncName[] = "insertLocallIndicesImpl: ";
1753 
1754  const RowInfo rowInfo = this->getRowInfo(myRow);
1755 
1756  size_t numNewInds = 0;
1757  size_t newNumEntries = 0;
1758 
1759  auto numEntries = rowInfo.numEntries;
1760  // Note: Teuchos::ArrayViews are in HostSpace
1761  using inp_view_type = View<const LO*, Kokkos::HostSpace, MemoryUnmanaged>;
1762  inp_view_type inputInds(indices.getRawPtr(), indices.size());
1763  size_t numInserted = 0;
1764  {
1765  auto lclInds = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1766  numInserted = Details::insertCrsIndices(myRow, this->getRowPtrsUnpackedHost(), lclInds,
1767  numEntries, inputInds, fun);
1768  }
1769 
1770  const bool insertFailed =
1771  numInserted == Teuchos::OrdinalTraits<size_t>::invalid();
1772  if(insertFailed) {
1773  constexpr size_t ONE (1);
1774  const size_t numInputInds(indices.size());
1775  const int myRank = this->getComm()->getRank();
1776  std::ostringstream os;
1777  os << "On MPI Process " << myRank << ": Not enough capacity to "
1778  "insert " << numInputInds
1779  << " ind" << (numInputInds != ONE ? "ices" : "ex")
1780  << " into local row " << myRow << ", which currently has "
1781  << rowInfo.numEntries
1782  << " entr" << (rowInfo.numEntries != ONE ? "ies" : "y")
1783  << " and total allocation size " << rowInfo.allocSize << ".";
1784  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1785  (true, std::runtime_error, os.str());
1786  }
1787  numNewInds = numInserted;
1788  newNumEntries = rowInfo.numEntries + numNewInds;
1789 
1790  this->k_numRowEntries_(myRow) += numNewInds;
1791  this->setLocallyModified ();
1792 
1793  if (debug_) {
1794  const size_t chkNewNumEntries = this->getNumEntriesInLocalRow (myRow);
1795  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1796  (chkNewNumEntries != newNumEntries, std::logic_error,
1797  "getNumEntriesInLocalRow(" << myRow << ") = " << chkNewNumEntries
1798  << " != newNumEntries = " << newNumEntries
1799  << ". Please report this bug to the Tpetra developers.");
1800  }
1801  }
1802 
1803  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1804  size_t
1807  const Teuchos::ArrayView<const GlobalOrdinal>& indices,
1808  std::function<void(const size_t, const size_t, const size_t)> fun) const
1809  {
1810  using GO = GlobalOrdinal;
1811  using Kokkos::View;
1812  using Kokkos::MemoryUnmanaged;
1813  auto invalidCount = Teuchos::OrdinalTraits<size_t>::invalid();
1814 
1815  using inp_view_type = View<const GO*, Kokkos::HostSpace, MemoryUnmanaged>;
1816  inp_view_type inputInds(indices.getRawPtr(), indices.size());
1817 
1818  size_t numFound = 0;
1819  LocalOrdinal lclRow = rowInfo.localRow;
1820  if (this->isLocallyIndexed())
1821  {
1822  if (this->colMap_.is_null())
1823  return invalidCount;
1824  const auto& colMap = *(this->colMap_);
1825  auto map = [&](GO const gblInd){return colMap.getLocalElement(gblInd);};
1826  numFound = Details::findCrsIndices(lclRow, this->getRowPtrsUnpackedHost(),
1827  rowInfo.numEntries,
1828  lclIndsUnpacked_wdv.getHostView(Access::ReadOnly), inputInds, map, fun);
1829  }
1830  else if (this->isGloballyIndexed())
1831  {
1832  numFound = Details::findCrsIndices(lclRow, this->getRowPtrsUnpackedHost(),
1833  rowInfo.numEntries,
1834  gblInds_wdv.getHostView(Access::ReadOnly), inputInds, fun);
1835  }
1836  return numFound;
1837  }
1838 
1839 
1840  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1841  size_t
1844  const bool sorted,
1845  const bool merged)
1846  {
1847  const size_t origNumEnt = rowInfo.numEntries;
1848  if (origNumEnt != Tpetra::Details::OrdinalTraits<size_t>::invalid () &&
1849  origNumEnt != 0) {
1850  auto lclColInds = this->getLocalIndsViewHostNonConst (rowInfo);
1851 
1852  LocalOrdinal* const lclColIndsRaw = lclColInds.data ();
1853  if (! sorted) {
1854  std::sort (lclColIndsRaw, lclColIndsRaw + origNumEnt);
1855  }
1856 
1857  if (! merged) {
1858  LocalOrdinal* const beg = lclColIndsRaw;
1859  LocalOrdinal* const end = beg + rowInfo.numEntries;
1860  LocalOrdinal* const newend = std::unique (beg, end);
1861  const size_t newNumEnt = newend - beg;
1862 
1863  // NOTE (mfh 08 May 2017) This is a host View, so it does not assume UVM.
1864  this->k_numRowEntries_(rowInfo.localRow) = newNumEnt;
1865  return origNumEnt - newNumEnt; // the number of duplicates in the row
1866  }
1867  else {
1868  return static_cast<size_t> (0); // assume no duplicates
1869  }
1870  }
1871  else {
1872  return static_cast<size_t> (0); // no entries in the row
1873  }
1874  }
1875 
1876 
1877  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1878  void
1880  setDomainRangeMaps (const Teuchos::RCP<const map_type>& domainMap,
1881  const Teuchos::RCP<const map_type>& rangeMap)
1882  {
1883  // simple pointer comparison for equality
1884  if (domainMap_ != domainMap) {
1885  domainMap_ = domainMap;
1886  importer_ = Teuchos::null;
1887  }
1888  if (rangeMap_ != rangeMap) {
1889  rangeMap_ = rangeMap;
1890  exporter_ = Teuchos::null;
1891  }
1892  }
1893 
1894 
1895  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1896  void
1899  {
1900  const auto INV = Teuchos::OrdinalTraits<global_size_t>::invalid();
1901 
1902  globalNumEntries_ = INV;
1903  globalMaxNumRowEntries_ = INV;
1904  haveGlobalConstants_ = false;
1905  }
1906 
1907 
1908  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1909  void
1912  {
1913  if (debug_) {
1914  using std::endl;
1915  const char tfecfFuncName[] = "checkInternalState: ";
1916  const char suffix[] = " Please report this bug to the Tpetra developers.";
1917 
1918  std::unique_ptr<std::string> prefix;
1919  if (verbose_) {
1920  prefix = this->createPrefix("CrsGraph", "checkInternalState");
1921  std::ostringstream os;
1922  os << *prefix << "Start" << endl;
1923  std::cerr << os.str();
1924  }
1925 
1926  const global_size_t GSTI = Teuchos::OrdinalTraits<global_size_t>::invalid ();
1927  //const size_t STI = Teuchos::OrdinalTraits<size_t>::invalid (); // unused
1928  // check the internal state of this data structure
1929  // this is called by numerous state-changing methods, in a debug build, to ensure that the object
1930  // always remains in a valid state
1931 
1932  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1933  (this->rowMap_.is_null (), std::logic_error,
1934  "Row Map is null." << suffix);
1935  // This may access the row Map, so we need to check first (above)
1936  // whether the row Map is null.
1937  const LocalOrdinal lclNumRows =
1938  static_cast<LocalOrdinal> (this->getLocalNumRows ());
1939 
1940  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1941  (this->isFillActive () == this->isFillComplete (), std::logic_error,
1942  "Graph cannot be both fill active and fill complete." << suffix);
1943  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1944  (this->isFillComplete () &&
1945  (this->colMap_.is_null () ||
1946  this->rangeMap_.is_null () ||
1947  this->domainMap_.is_null ()),
1948  std::logic_error,
1949  "Graph is full complete, but at least one of {column, range, domain} "
1950  "Map is null." << suffix);
1951  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1952  (this->isStorageOptimized () && ! this->indicesAreAllocated (),
1953  std::logic_error, "Storage is optimized, but indices are not "
1954  "allocated, not even trivially." << suffix);
1955 
1956  size_t nodeAllocSize = 0;
1957  try {
1958  nodeAllocSize = this->getLocalAllocationSize ();
1959  }
1960  catch (std::logic_error& e) {
1961  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1962  (true, std::runtime_error, "getLocalAllocationSize threw "
1963  "std::logic_error: " << e.what ());
1964  }
1965  catch (std::exception& e) {
1966  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1967  (true, std::runtime_error, "getLocalAllocationSize threw an "
1968  "std::exception: " << e.what ());
1969  }
1970  catch (...) {
1971  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1972  (true, std::runtime_error, "getLocalAllocationSize threw an exception "
1973  "not a subclass of std::exception.");
1974  }
1975 
1976  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1977  (this->isStorageOptimized () &&
1978  nodeAllocSize != this->getLocalNumEntries (),
1979  std::logic_error, "Storage is optimized, but "
1980  "this->getLocalAllocationSize() = " << nodeAllocSize
1981  << " != this->getLocalNumEntries() = " << this->getLocalNumEntries ()
1982  << "." << suffix);
1983  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1984  (! this->haveGlobalConstants_ &&
1985  (this->globalNumEntries_ != GSTI ||
1986  this->globalMaxNumRowEntries_ != GSTI),
1987  std::logic_error, "Graph claims not to have global constants, but "
1988  "some of the global constants are not marked as invalid." << suffix);
1989  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1990  (this->haveGlobalConstants_ &&
1991  (this->globalNumEntries_ == GSTI ||
1992  this->globalMaxNumRowEntries_ == GSTI),
1993  std::logic_error, "Graph claims to have global constants, but "
1994  "some of them are marked as invalid." << suffix);
1995  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1996  (this->haveGlobalConstants_ &&
1997  (this->globalNumEntries_ < this->getLocalNumEntries () ||
1998  this->globalMaxNumRowEntries_ < this->nodeMaxNumRowEntries_),
1999  std::logic_error, "Graph claims to have global constants, and "
2000  "all of the values of the global constants are valid, but "
2001  "some of the local constants are greater than "
2002  "their corresponding global constants." << suffix);
2003  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2004  (this->indicesAreAllocated () &&
2005  (this->numAllocForAllRows_ != 0 ||
2006  this->k_numAllocPerRow_.extent (0) != 0),
2007  std::logic_error, "The graph claims that its indices are allocated, but "
2008  "either numAllocForAllRows_ (= " << this->numAllocForAllRows_ << ") is "
2009  "nonzero, or k_numAllocPerRow_ has nonzero dimension. In other words, "
2010  "the graph is supposed to release its \"allocation specifications\" "
2011  "when it allocates its indices." << suffix);
2012  auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
2013  auto rowPtrsUnpacked_dev = this->getRowPtrsUnpackedDevice();
2014  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2015  (rowPtrsUnpacked_host.extent(0) != rowPtrsUnpacked_dev.extent(0),
2016  std::logic_error, "The host and device views of k_rowPtrs_ have "
2017  "different sizes; rowPtrsUnpacked_host_ has size "
2018  << rowPtrsUnpacked_host.extent(0)
2019  << ", but rowPtrsUnpacked_dev_ has size "
2020  << rowPtrsUnpacked_dev.extent(0)
2021  << "." << suffix);
2022  if (isGloballyIndexed() && rowPtrsUnpacked_host.extent(0) != 0) {
2023  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2024  (size_t(rowPtrsUnpacked_host.extent(0)) != size_t(lclNumRows + 1),
2025  std::logic_error, "The graph is globally indexed and "
2026  "k_rowPtrs has nonzero size " << rowPtrsUnpacked_host.extent(0)
2027  << ", but that size does not equal lclNumRows+1 = "
2028  << (lclNumRows+1) << "." << suffix);
2029  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2030  (rowPtrsUnpacked_host(lclNumRows) != size_t(gblInds_wdv.extent(0)),
2031  std::logic_error, "The graph is globally indexed and "
2032  "k_rowPtrs_ has nonzero size " << rowPtrsUnpacked_host.extent(0)
2033  << ", but k_rowPtrs_(lclNumRows=" << lclNumRows << ")="
2034  << rowPtrsUnpacked_host(lclNumRows)
2035  << " != gblInds_wdv.extent(0)="
2036  << gblInds_wdv.extent(0) << "." << suffix);
2037  }
2038  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2039  (this->isLocallyIndexed () &&
2040  rowPtrsUnpacked_host.extent (0) != 0 &&
2041  (static_cast<size_t> (rowPtrsUnpacked_host.extent (0)) !=
2042  static_cast<size_t> (lclNumRows + 1) ||
2043  rowPtrsUnpacked_host(lclNumRows) !=
2044  static_cast<size_t> (this->lclIndsUnpacked_wdv.extent (0))),
2045  std::logic_error, "If k_rowPtrs_ has nonzero size and "
2046  "the graph is locally indexed, then "
2047  "k_rowPtrs_ must have N+1 rows, and "
2048  "k_rowPtrs_(N) must equal lclIndsUnpacked_wdv.extent(0)." << suffix);
2049 
2050  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2051  (this->indicesAreAllocated () &&
2052  nodeAllocSize > 0 &&
2053  this->lclIndsUnpacked_wdv.extent (0) == 0 &&
2054  this->gblInds_wdv.extent (0) == 0,
2055  std::logic_error, "Graph is allocated nontrivially, but "
2056  "but 1-D allocations are not present." << suffix);
2057 
2058  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2059  (! this->indicesAreAllocated () &&
2060  ((rowPtrsUnpacked_host.extent (0) != 0 ||
2061  this->k_numRowEntries_.extent (0) != 0) ||
2062  this->lclIndsUnpacked_wdv.extent (0) != 0 ||
2063  this->gblInds_wdv.extent (0) != 0),
2064  std::logic_error, "If indices are not allocated, "
2065  "then none of the buffers should be." << suffix);
2066  // indices may be local or global only if they are allocated
2067  // (numAllocated is redundant; could simply be indicesAreLocal_ ||
2068  // indicesAreGlobal_)
2069  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2070  ((this->indicesAreLocal_ || this->indicesAreGlobal_) &&
2071  ! this->indicesAreAllocated_,
2072  std::logic_error, "Indices may be local or global only if they are "
2073  "allocated." << suffix);
2074  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2075  (this->indicesAreLocal_ && this->indicesAreGlobal_,
2076  std::logic_error, "Indices may not be both local and global." << suffix);
2077  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2078  (indicesAreLocal_ && gblInds_wdv.extent (0) != 0,
2079  std::logic_error, "Indices are local, but "
2080  "gblInds_wdv.extent(0) (= " << gblInds_wdv.extent (0)
2081  << ") != 0. In other words, if indices are local, then "
2082  "allocations of global indices should not be present."
2083  << suffix);
2084  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2085  (indicesAreGlobal_ && lclIndsUnpacked_wdv.extent (0) != 0,
2086  std::logic_error, "Indices are global, but "
2087  "lclIndsUnpacked_wdv.extent(0) (= " << lclIndsUnpacked_wdv.extent(0)
2088  << ") != 0. In other words, if indices are global, "
2089  "then allocations for local indices should not be present."
2090  << suffix);
2091  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2092  (indicesAreLocal_ && nodeAllocSize > 0 &&
2093  lclIndsUnpacked_wdv.extent (0) == 0 && getLocalNumRows () > 0,
2094  std::logic_error, "Indices are local and "
2095  "getLocalAllocationSize() = " << nodeAllocSize << " > 0, but "
2096  "lclIndsUnpacked_wdv.extent(0) = 0 and getLocalNumRows() = "
2097  << getLocalNumRows () << " > 0." << suffix);
2098  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2099  (indicesAreGlobal_ && nodeAllocSize > 0 &&
2100  gblInds_wdv.extent (0) == 0 && getLocalNumRows () > 0,
2101  std::logic_error, "Indices are global and "
2102  "getLocalAllocationSize() = " << nodeAllocSize << " > 0, but "
2103  "gblInds_wdv.extent(0) = 0 and getLocalNumRows() = "
2104  << getLocalNumRows () << " > 0." << suffix);
2105  // check the actual allocations
2106  if (this->indicesAreAllocated () &&
2107  rowPtrsUnpacked_host.extent (0) != 0) {
2108  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2109  (static_cast<size_t> (rowPtrsUnpacked_host.extent (0)) !=
2110  this->getLocalNumRows () + 1,
2111  std::logic_error, "Indices are allocated and "
2112  "k_rowPtrs_ has nonzero length, but rowPtrsUnpacked_host_.extent(0) = "
2113  << rowPtrsUnpacked_host.extent (0) << " != getLocalNumRows()+1 = "
2114  << (this->getLocalNumRows () + 1) << "." << suffix);
2115  const size_t actualNumAllocated =
2116  rowPtrsUnpacked_host(this->getLocalNumRows());
2117  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2118  (this->isLocallyIndexed () &&
2119  static_cast<size_t> (this->lclIndsUnpacked_wdv.extent (0)) != actualNumAllocated,
2120  std::logic_error, "Graph is locally indexed, indices are "
2121  "are allocated, and k_rowPtrs_ has nonzero length, but "
2122  "lclIndsUnpacked_wdv.extent(0) = " << this->lclIndsUnpacked_wdv.extent (0)
2123  << " != actualNumAllocated = " << actualNumAllocated << suffix);
2124  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2125  (this->isGloballyIndexed () &&
2126  static_cast<size_t> (this->gblInds_wdv.extent (0)) != actualNumAllocated,
2127  std::logic_error, "Graph is globally indexed, indices "
2128  "are allocated, and k_rowPtrs_ has nonzero length, but "
2129  "gblInds_wdv.extent(0) = " << this->gblInds_wdv.extent (0)
2130  << " != actualNumAllocated = " << actualNumAllocated << suffix);
2131  }
2132 
2133  if (verbose_) {
2134  std::ostringstream os;
2135  os << *prefix << "Done" << endl;
2136  std::cerr << os.str();
2137  }
2138  }
2139  }
2140 
2141 
2142  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2143  size_t
2145  getNumEntriesInGlobalRow (GlobalOrdinal globalRow) const
2146  {
2147  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (globalRow);
2148  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2149  return Teuchos::OrdinalTraits<size_t>::invalid ();
2150  }
2151  else {
2152  return rowInfo.numEntries;
2153  }
2154  }
2155 
2156 
2157  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2158  size_t
2160  getNumEntriesInLocalRow (LocalOrdinal localRow) const
2161  {
2162  const RowInfo rowInfo = this->getRowInfo (localRow);
2163  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2164  return Teuchos::OrdinalTraits<size_t>::invalid ();
2165  }
2166  else {
2167  return rowInfo.numEntries;
2168  }
2169  }
2170 
2171 
2172  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2173  size_t
2175  getNumAllocatedEntriesInGlobalRow (GlobalOrdinal globalRow) const
2176  {
2177  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (globalRow);
2178  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2179  return Teuchos::OrdinalTraits<size_t>::invalid ();
2180  }
2181  else {
2182  return rowInfo.allocSize;
2183  }
2184  }
2185 
2186 
2187  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2188  size_t
2190  getNumAllocatedEntriesInLocalRow (LocalOrdinal localRow) const
2191  {
2192  const RowInfo rowInfo = this->getRowInfo (localRow);
2193  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2194  return Teuchos::OrdinalTraits<size_t>::invalid ();
2195  }
2196  else {
2197  return rowInfo.allocSize;
2198  }
2199  }
2200 
2201 
2202  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2203  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::row_ptrs_host_view_type
2206  {
2207  return getRowPtrsPackedHost();
2208  }
2209 
2210  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2211  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::row_ptrs_device_view_type
2214  {
2215  return getRowPtrsPackedDevice();
2216  }
2217 
2218 
2219  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2220  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::local_inds_host_view_type
2223  {
2224  return lclIndsPacked_wdv.getHostView(Access::ReadOnly);
2225  }
2226 
2227  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2231  {
2232  return lclIndsPacked_wdv.getDeviceView(Access::ReadOnly);
2233  }
2234 
2235  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2236  void
2238  getLocalRowCopy (LocalOrdinal localRow,
2239  nonconst_local_inds_host_view_type & indices,
2240  size_t& numEntries) const
2241  {
2242  using Teuchos::ArrayView;
2243  const char tfecfFuncName[] = "getLocalRowCopy: ";
2244 
2245  TEUCHOS_TEST_FOR_EXCEPTION(
2246  isGloballyIndexed () && ! hasColMap (), std::runtime_error,
2247  "Tpetra::CrsGraph::getLocalRowCopy: The graph is globally indexed and "
2248  "does not have a column Map yet. That means we don't have local indices "
2249  "for columns yet, so it doesn't make sense to call this method. If the "
2250  "graph doesn't have a column Map yet, you should call fillComplete on "
2251  "it first.");
2252 
2253  // This does the right thing (reports an empty row) if the input
2254  // row is invalid.
2255  const RowInfo rowinfo = this->getRowInfo (localRow);
2256  // No side effects on error.
2257  const size_t theNumEntries = rowinfo.numEntries;
2258  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2259  (static_cast<size_t> (indices.size ()) < theNumEntries,std::runtime_error,
2260  "Specified storage (size==" << indices.size () << ") does not suffice "
2261  "to hold all " << theNumEntries << " entry/ies for this row.");
2262  numEntries = theNumEntries;
2263 
2264  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2265  if (isLocallyIndexed ()) {
2266  auto lclInds = getLocalIndsViewHost(rowinfo);
2267  for (size_t j = 0; j < theNumEntries; ++j) {
2268  indices[j] = lclInds(j);
2269  }
2270  }
2271  else if (isGloballyIndexed ()) {
2272  auto gblInds = getGlobalIndsViewHost(rowinfo);
2273  for (size_t j = 0; j < theNumEntries; ++j) {
2274  indices[j] = colMap_->getLocalElement (gblInds(j));
2275  }
2276  }
2277  }
2278  }
2279 
2280 
2281  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2282  void
2284  getGlobalRowCopy (GlobalOrdinal globalRow,
2285  nonconst_global_inds_host_view_type &indices,
2286  size_t& numEntries) const
2287  {
2288  using Teuchos::ArrayView;
2289  const char tfecfFuncName[] = "getGlobalRowCopy: ";
2290 
2291  // This does the right thing (reports an empty row) if the input
2292  // row is invalid.
2293  const RowInfo rowinfo = getRowInfoFromGlobalRowIndex (globalRow);
2294  const size_t theNumEntries = rowinfo.numEntries;
2295  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2296  static_cast<size_t> (indices.size ()) < theNumEntries, std::runtime_error,
2297  "Specified storage (size==" << indices.size () << ") does not suffice "
2298  "to hold all " << theNumEntries << " entry/ies for this row.");
2299  numEntries = theNumEntries; // first side effect
2300 
2301  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2302  if (isLocallyIndexed ()) {
2303  auto lclInds = getLocalIndsViewHost(rowinfo);
2304  for (size_t j = 0; j < theNumEntries; ++j) {
2305  indices[j] = colMap_->getGlobalElement (lclInds(j));
2306  }
2307  }
2308  else if (isGloballyIndexed ()) {
2309  auto gblInds = getGlobalIndsViewHost(rowinfo);
2310  for (size_t j = 0; j < theNumEntries; ++j) {
2311  indices[j] = gblInds(j);
2312  }
2313  }
2314  }
2315  }
2316 
2317 
2318  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2319  void
2322  const LocalOrdinal localRow,
2323  local_inds_host_view_type &indices) const
2324  {
2325  const char tfecfFuncName[] = "getLocalRowView: ";
2326 
2327  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2328  (isGloballyIndexed (), std::runtime_error, "The graph's indices are "
2329  "currently stored as global indices, so we cannot return a view with "
2330  "local column indices, whether or not the graph has a column Map. If "
2331  "the graph _does_ have a column Map, use getLocalRowCopy() instead.");
2332 
2333  const RowInfo rowInfo = getRowInfo (localRow);
2334  if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2335  rowInfo.numEntries > 0) {
2336  indices = lclIndsUnpacked_wdv.getHostSubview(rowInfo.offset1D,
2337  rowInfo.numEntries,
2338  Access::ReadOnly);
2339  }
2340  else {
2341  // This does the right thing (reports an empty row) if the input
2342  // row is invalid.
2343  indices = local_inds_host_view_type();
2344  }
2345 
2346  if (debug_) {
2347  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2348  (static_cast<size_t> (indices.size ()) !=
2349  getNumEntriesInLocalRow (localRow), std::logic_error, "indices.size() "
2350  "= " << indices.extent(0) << " != getNumEntriesInLocalRow(localRow=" <<
2351  localRow << ") = " << getNumEntriesInLocalRow(localRow) <<
2352  ". Please report this bug to the Tpetra developers.");
2353  }
2354  }
2355 
2356 
2357  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2358  void
2361  const GlobalOrdinal globalRow,
2362  global_inds_host_view_type &indices) const
2363  {
2364  const char tfecfFuncName[] = "getGlobalRowView: ";
2365 
2366  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2367  (isLocallyIndexed (), std::runtime_error, "The graph's indices are "
2368  "currently stored as local indices, so we cannot return a view with "
2369  "global column indices. Use getGlobalRowCopy() instead.");
2370 
2371  // This does the right thing (reports an empty row) if the input
2372  // row is invalid.
2373  const RowInfo rowInfo = getRowInfoFromGlobalRowIndex (globalRow);
2374  if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2375  rowInfo.numEntries > 0) {
2376  indices = gblInds_wdv.getHostSubview(rowInfo.offset1D,
2377  rowInfo.numEntries,
2378  Access::ReadOnly);
2379  }
2380  else {
2381  indices = typename global_inds_dualv_type::t_host::const_type();
2382  }
2383  if (debug_) {
2384  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2385  (static_cast<size_t> (indices.size ()) !=
2386  getNumEntriesInGlobalRow (globalRow),
2387  std::logic_error, "indices.size() = " << indices.extent(0)
2388  << " != getNumEntriesInGlobalRow(globalRow=" << globalRow << ") = "
2389  << getNumEntriesInGlobalRow (globalRow)
2390  << ". Please report this bug to the Tpetra developers.");
2391  }
2392  }
2393 
2394 
2395  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2396  void
2398  insertLocalIndices (const LocalOrdinal localRow,
2399  const Teuchos::ArrayView<const LocalOrdinal>& indices)
2400  {
2401  const char tfecfFuncName[] = "insertLocalIndices: ";
2402 
2403  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2404  (! isFillActive (), std::runtime_error, "Fill must be active.");
2405  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2406  (isGloballyIndexed (), std::runtime_error,
2407  "Graph indices are global; use insertGlobalIndices().");
2408  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2409  (! hasColMap (), std::runtime_error,
2410  "Cannot insert local indices without a column Map.");
2411  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2412  (! rowMap_->isNodeLocalElement (localRow), std::runtime_error,
2413  "Local row index " << localRow << " is not in the row Map "
2414  "on the calling process.");
2415  if (! indicesAreAllocated ()) {
2416  allocateIndices (LocalIndices, verbose_);
2417  }
2418 
2419  if (debug_) {
2420  // In debug mode, if the graph has a column Map, test whether any
2421  // of the given column indices are not in the column Map. Keep
2422  // track of the invalid column indices so we can tell the user
2423  // about them.
2424  if (hasColMap ()) {
2425  using Teuchos::Array;
2426  using Teuchos::toString;
2427  using std::endl;
2428  typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type size_type;
2429 
2430  const map_type& colMap = *colMap_;
2431  Array<LocalOrdinal> badColInds;
2432  bool allInColMap = true;
2433  for (size_type k = 0; k < indices.size (); ++k) {
2434  if (! colMap.isNodeLocalElement (indices[k])) {
2435  allInColMap = false;
2436  badColInds.push_back (indices[k]);
2437  }
2438  }
2439  if (! allInColMap) {
2440  std::ostringstream os;
2441  os << "Tpetra::CrsGraph::insertLocalIndices: You attempted to insert "
2442  "entries in owned row " << localRow << ", at the following column "
2443  "indices: " << toString (indices) << "." << endl;
2444  os << "Of those, the following indices are not in the column Map on "
2445  "this process: " << toString (badColInds) << "." << endl << "Since "
2446  "the graph has a column Map already, it is invalid to insert entries "
2447  "at those locations.";
2448  TEUCHOS_TEST_FOR_EXCEPTION(! allInColMap, std::invalid_argument, os.str ());
2449  }
2450  }
2451  }
2452 
2453  insertLocalIndicesImpl (localRow, indices);
2454 
2455  if (debug_) {
2456  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2457  (! indicesAreAllocated () || ! isLocallyIndexed (), std::logic_error,
2458  "At the end of insertLocalIndices, ! indicesAreAllocated() || "
2459  "! isLocallyIndexed() is true. Please report this bug to the "
2460  "Tpetra developers.");
2461  }
2462  }
2463 
2464  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2465  void
2467  insertLocalIndices (const LocalOrdinal localRow,
2468  const LocalOrdinal numEnt,
2469  const LocalOrdinal inds[])
2470  {
2471  Teuchos::ArrayView<const LocalOrdinal> indsT (inds, numEnt);
2472  this->insertLocalIndices (localRow, indsT);
2473  }
2474 
2475 
2476  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2477  void
2479  insertGlobalIndices (const GlobalOrdinal gblRow,
2480  const LocalOrdinal numInputInds,
2481  const GlobalOrdinal inputGblColInds[])
2482  {
2483  typedef LocalOrdinal LO;
2484  const char tfecfFuncName[] = "insertGlobalIndices: ";
2485 
2486  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2487  (this->isLocallyIndexed (), std::runtime_error,
2488  "graph indices are local; use insertLocalIndices().");
2489  // This can't really be satisfied for now, because if we are
2490  // fillComplete(), then we are local. In the future, this may
2491  // change. However, the rule that modification require active
2492  // fill will not change.
2493  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2494  (! this->isFillActive (), std::runtime_error,
2495  "You are not allowed to call this method if fill is not active. "
2496  "If fillComplete has been called, you must first call resumeFill "
2497  "before you may insert indices.");
2498  if (! indicesAreAllocated ()) {
2499  allocateIndices (GlobalIndices, verbose_);
2500  }
2501  const LO lclRow = this->rowMap_->getLocalElement (gblRow);
2502  if (lclRow != Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
2503  if (debug_) {
2504  if (this->hasColMap ()) {
2505  using std::endl;
2506  const map_type& colMap = * (this->colMap_);
2507  // In a debug build, keep track of the nonowned ("bad") column
2508  // indices, so that we can display them in the exception
2509  // message. In a release build, just ditch the loop early if
2510  // we encounter a nonowned column index.
2511  std::vector<GlobalOrdinal> badColInds;
2512  bool allInColMap = true;
2513  for (LO k = 0; k < numInputInds; ++k) {
2514  if (! colMap.isNodeGlobalElement (inputGblColInds[k])) {
2515  allInColMap = false;
2516  badColInds.push_back (inputGblColInds[k]);
2517  }
2518  }
2519  if (! allInColMap) {
2520  std::ostringstream os;
2521  os << "You attempted to insert entries in owned row " << gblRow
2522  << ", at the following column indices: [";
2523  for (LO k = 0; k < numInputInds; ++k) {
2524  os << inputGblColInds[k];
2525  if (k + static_cast<LO> (1) < numInputInds) {
2526  os << ",";
2527  }
2528  }
2529  os << "]." << endl << "Of those, the following indices are not in "
2530  "the column Map on this process: [";
2531  for (size_t k = 0; k < badColInds.size (); ++k) {
2532  os << badColInds[k];
2533  if (k + size_t (1) < badColInds.size ()) {
2534  os << ",";
2535  }
2536  }
2537  os << "]." << endl << "Since the matrix has a column Map already, "
2538  "it is invalid to insert entries at those locations.";
2539  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2540  (true, std::invalid_argument, os.str ());
2541  }
2542  }
2543  } // debug_
2544  this->insertGlobalIndicesImpl (lclRow, inputGblColInds, numInputInds);
2545  }
2546  else { // a nonlocal row
2547  this->insertGlobalIndicesIntoNonownedRows (gblRow, inputGblColInds,
2548  numInputInds);
2549  }
2550  }
2551 
2552 
2553  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2554  void
2556  insertGlobalIndices (const GlobalOrdinal gblRow,
2557  const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds)
2558  {
2559  this->insertGlobalIndices (gblRow, inputGblColInds.size (),
2560  inputGblColInds.getRawPtr ());
2561  }
2562 
2563 
2564  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2565  void
2567  insertGlobalIndicesFiltered (const LocalOrdinal lclRow,
2568  const GlobalOrdinal gblColInds[],
2569  const LocalOrdinal numGblColInds)
2570  {
2571  typedef LocalOrdinal LO;
2572  typedef GlobalOrdinal GO;
2573  const char tfecfFuncName[] = "insertGlobalIndicesFiltered: ";
2574 
2575  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2576  (this->isLocallyIndexed (), std::runtime_error,
2577  "Graph indices are local; use insertLocalIndices().");
2578  // This can't really be satisfied for now, because if we are
2579  // fillComplete(), then we are local. In the future, this may
2580  // change. However, the rule that modification require active
2581  // fill will not change.
2582  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2583  (! this->isFillActive (), std::runtime_error,
2584  "You are not allowed to call this method if fill is not active. "
2585  "If fillComplete has been called, you must first call resumeFill "
2586  "before you may insert indices.");
2587  if (! indicesAreAllocated ()) {
2588  allocateIndices (GlobalIndices, verbose_);
2589  }
2590 
2591  Teuchos::ArrayView<const GO> gblColInds_av (gblColInds, numGblColInds);
2592  // If we have a column Map, use it to filter the entries.
2593  if (! colMap_.is_null ()) {
2594  const map_type& colMap = * (this->colMap_);
2595 
2596  LO curOffset = 0;
2597  while (curOffset < numGblColInds) {
2598  // Find a sequence of input indices that are in the column Map
2599  // on the calling process. Doing a sequence at a time,
2600  // instead of one at a time, amortizes some overhead.
2601  LO endOffset = curOffset;
2602  for ( ; endOffset < numGblColInds; ++endOffset) {
2603  const LO lclCol = colMap.getLocalElement (gblColInds[endOffset]);
2604  if (lclCol == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
2605  break; // first entry, in current sequence, not in the column Map
2606  }
2607  }
2608  // curOffset, endOffset: half-exclusive range of indices in
2609  // the column Map on the calling process. If endOffset ==
2610  // curOffset, the range is empty.
2611  const LO numIndInSeq = (endOffset - curOffset);
2612  if (numIndInSeq != 0) {
2613  this->insertGlobalIndicesImpl (lclRow, gblColInds + curOffset,
2614  numIndInSeq);
2615  }
2616  // Invariant before this line: Either endOffset ==
2617  // numGblColInds, or gblColInds[endOffset] is not in the
2618  // column Map on the calling process.
2619  curOffset = endOffset + 1;
2620  }
2621  }
2622  else {
2623  this->insertGlobalIndicesImpl (lclRow, gblColInds_av.getRawPtr (),
2624  gblColInds_av.size ());
2625  }
2626  }
2627 
2628  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2629  void
2631  insertGlobalIndicesIntoNonownedRows (const GlobalOrdinal gblRow,
2632  const GlobalOrdinal gblColInds[],
2633  const LocalOrdinal numGblColInds)
2634  {
2635  // This creates the std::vector if it doesn't exist yet.
2636  // std::map's operator[] does a lookup each time, so it's better
2637  // to pull nonlocals_[grow] out of the loop.
2638  std::vector<GlobalOrdinal>& nonlocalRow = this->nonlocals_[gblRow];
2639  for (LocalOrdinal k = 0; k < numGblColInds; ++k) {
2640  // FIXME (mfh 20 Jul 2017) Would be better to use a set, in
2641  // order to avoid duplicates. globalAssemble() sorts these
2642  // anyway.
2643  nonlocalRow.push_back (gblColInds[k]);
2644  }
2645  }
2646 
2647  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2648  void
2650  removeLocalIndices (LocalOrdinal lrow)
2651  {
2652  const char tfecfFuncName[] = "removeLocalIndices: ";
2653  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2654  ! isFillActive (), std::runtime_error, "requires that fill is active.");
2655  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2656  isStorageOptimized (), std::runtime_error,
2657  "cannot remove indices after optimizeStorage() has been called.");
2658  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2659  isGloballyIndexed (), std::runtime_error, "graph indices are global.");
2660  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2661  ! rowMap_->isNodeLocalElement (lrow), std::runtime_error,
2662  "Local row " << lrow << " is not in the row Map on the calling process.");
2663  if (! indicesAreAllocated ()) {
2664  allocateIndices (LocalIndices, verbose_);
2665  }
2666 
2667  if (k_numRowEntries_.extent (0) != 0) {
2668  this->k_numRowEntries_(lrow) = 0;
2669  }
2670 
2671  if (debug_) {
2672  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2673  (getNumEntriesInLocalRow (lrow) != 0 ||
2674  ! indicesAreAllocated () ||
2675  ! isLocallyIndexed (), std::logic_error,
2676  "Violated stated post-conditions. Please contact Tpetra team.");
2677  }
2678  }
2679 
2680 
2681  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2682  void
2684  setAllIndices (const typename local_graph_device_type::row_map_type& rowPointers,
2685  const typename local_graph_device_type::entries_type::non_const_type& columnIndices)
2686  {
2687  using ProfilingRegion=Details::ProfilingRegion;
2688  ProfilingRegion region ("Tpetra::CrsGraph::setAllIndices");
2689  const char tfecfFuncName[] = "setAllIndices: ";
2690  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2691  ! hasColMap () || getColMap ().is_null (), std::runtime_error,
2692  "The graph must have a column Map before you may call this method.");
2693  LocalOrdinal numLocalRows = this->getLocalNumRows ();
2694  {
2695  LocalOrdinal rowPtrLen = rowPointers.size();
2696  if(numLocalRows == 0) {
2697  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2698  rowPtrLen != 0 && rowPtrLen != 1,
2699  std::runtime_error, "Have 0 local rows, but rowPointers.size() is neither 0 nor 1.");
2700  }
2701  else {
2702  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2703  rowPtrLen != numLocalRows + 1,
2704  std::runtime_error, "rowPointers.size() = " << rowPtrLen <<
2705  " != this->getLocalNumRows()+1 = " << (numLocalRows + 1) << ".");
2706  }
2707  }
2708 
2709  if(debug_) {
2710  using exec_space = typename local_graph_device_type::execution_space;
2711  int columnsOutOfBounds = 0;
2712  local_ordinal_type numLocalCols = this->getLocalNumCols();
2713  Kokkos::parallel_reduce(Kokkos::RangePolicy<exec_space>(0, columnIndices.extent(0)),
2714  KOKKOS_LAMBDA (const LocalOrdinal i, int& lOutOfBounds)
2715  {
2716  if(columnIndices(i) < 0 || columnIndices(i) >= numLocalCols)
2717  lOutOfBounds++;
2718  }, columnsOutOfBounds);
2719  int globalColsOutOfBounds= 0;
2720  auto comm = this->getComm();
2721  Teuchos::reduceAll<int, int> (*comm, Teuchos::REDUCE_MAX, columnsOutOfBounds,
2722  Teuchos::outArg (globalColsOutOfBounds));
2723  if (globalColsOutOfBounds)
2724  {
2725  std::string message;
2726  if (columnsOutOfBounds)
2727  {
2728  //Only print message from ranks with the problem
2729  message = std::string("ERROR, rank ") + std::to_string(comm->getRank()) + ", CrsGraph::setAllIndices(): provided columnIndices are not all within range [0, getLocalNumCols())!\n";
2730  }
2731  Details::gathervPrint(std::cout, message, *comm);
2732  throw std::invalid_argument("CrsGraph::setAllIndices(): columnIndices are out of the valid range on at least one process.");
2733  }
2734  }
2735 
2736  if (debug_ && this->isSorted()) {
2737  // Verify that the local indices are actually sorted
2738  int notSorted = 0;
2739  using exec_space = typename local_graph_device_type::execution_space;
2740  using size_type = typename local_graph_device_type::size_type;
2741  Kokkos::parallel_reduce(Kokkos::RangePolicy<exec_space>(0, numLocalRows),
2742  KOKKOS_LAMBDA (const LocalOrdinal i, int& lNotSorted)
2743  {
2744  size_type rowBegin = rowPointers(i);
2745  size_type rowEnd = rowPointers(i + 1);
2746  for(size_type j = rowBegin + 1; j < rowEnd; j++)
2747  {
2748  if(columnIndices(j - 1) > columnIndices(j))
2749  {
2750  lNotSorted = 1;
2751  }
2752  }
2753  }, notSorted);
2754  //All-reduce notSorted to avoid rank divergence
2755  int globalNotSorted = 0;
2756  auto comm = this->getComm();
2757  Teuchos::reduceAll<int, int> (*comm, Teuchos::REDUCE_MAX, notSorted,
2758  Teuchos::outArg (globalNotSorted));
2759  if (globalNotSorted)
2760  {
2761  std::string message;
2762  if (notSorted)
2763  {
2764  //Only print message from ranks with the problem
2765  message = std::string("ERROR, rank ") + std::to_string(comm->getRank()) + ", CrsGraph::setAllIndices(): provided columnIndices are not sorted!\n";
2766  }
2767  Details::gathervPrint(std::cout, message, *comm);
2768  throw std::invalid_argument("CrsGraph::setAllIndices(): provided columnIndices are not sorted within rows on at least one process.");
2769  }
2770  }
2771 
2772  indicesAreAllocated_ = true;
2773  indicesAreLocal_ = true;
2774  indicesAreSorted_ = true;
2775  noRedundancies_ = true;
2776  lclIndsPacked_wdv= local_inds_wdv_type(columnIndices);
2777  lclIndsUnpacked_wdv = lclIndsPacked_wdv;
2778  setRowPtrs(rowPointers);
2779 
2780  set_need_sync_host_uvm_access(); // columnIndices and rowPointers potentially still in a kernel
2781 
2782  // Storage MUST be packed, since the interface doesn't give any
2783  // way to indicate any extra space at the end of each row.
2784  storageStatus_ = Details::STORAGE_1D_PACKED;
2785 
2786  // These normally get cleared out at the end of allocateIndices.
2787  // It makes sense to clear them out here, because at the end of
2788  // this method, the graph is allocated on the calling process.
2789  numAllocForAllRows_ = 0;
2790  k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
2791 
2792  checkInternalState ();
2793  }
2794 
2795 
2796  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2797  void
2799  setAllIndices (const Teuchos::ArrayRCP<size_t>& rowPointers,
2800  const Teuchos::ArrayRCP<LocalOrdinal>& columnIndices)
2801  {
2802  using Kokkos::View;
2803  typedef typename local_graph_device_type::row_map_type row_map_type;
2804  typedef typename row_map_type::array_layout layout_type;
2805  typedef typename row_map_type::non_const_value_type row_offset_type;
2806  typedef View<size_t*, layout_type , Kokkos::HostSpace,
2807  Kokkos::MemoryUnmanaged> input_view_type;
2808  typedef typename row_map_type::non_const_type nc_row_map_type;
2809 
2810  const size_t size = static_cast<size_t> (rowPointers.size ());
2811  constexpr bool same = std::is_same<size_t, row_offset_type>::value;
2812  input_view_type ptr_in (rowPointers.getRawPtr (), size);
2813 
2814  nc_row_map_type ptr_rot ("Tpetra::CrsGraph::ptr", size);
2815 
2816  // FIXME get rid of the else-clause when the minimum CXX standard required is bumped to C++17
2817 #ifdef KOKKOS_ENABLE_CXX17
2818  if constexpr (same) { // size_t == row_offset_type
2819  using lexecution_space = typename device_type::execution_space;
2820  Kokkos::deep_copy (lexecution_space(),
2821  ptr_rot,
2822  ptr_in);
2823  }
2824 #else
2825  if (same) { // size_t == row_offset_type
2826  // This compile-time logic ensures that the compiler never sees
2827  // an assignment of View<row_offset_type*, ...> to View<size_t*,
2828  // ...> unless size_t == row_offset_type.
2829  input_view_type ptr_decoy (rowPointers.getRawPtr (), size); // never used
2830  // DEEP_COPY REVIEW - HOST-TO-DEVICE
2832  Kokkos::Impl::if_c<same,
2833  nc_row_map_type,
2834  input_view_type>::select (ptr_rot, ptr_decoy),
2835  ptr_in);
2836  }
2837 #endif
2838  else { // size_t != row_offset_type
2839  // CudaUvmSpace != HostSpace, so this will be false in that case.
2840  constexpr bool inHostMemory =
2841  std::is_same<typename row_map_type::memory_space,
2842  Kokkos::HostSpace>::value;
2843  if (inHostMemory) {
2844  // Copy (with cast from size_t to row_offset_type, with bounds
2845  // checking if necessary) to ptr_rot.
2846  ::Tpetra::Details::copyOffsets (ptr_rot, ptr_in);
2847  }
2848  else { // Copy input row offsets to device first.
2849  //
2850  // FIXME (mfh 24 Mar 2015) If CUDA UVM, running in the host's
2851  // execution space would avoid the double copy.
2852  //
2853  View<size_t*, layout_type, device_type> ptr_st ("Tpetra::CrsGraph::ptr", size);
2854 
2855  // DEEP_COPY REVIEW - NOT TESTED
2856  Kokkos::deep_copy (ptr_st, ptr_in);
2857  // Copy on device (casting from size_t to row_offset_type,
2858  // with bounds checking if necessary) to ptr_rot. This
2859  // executes in the output View's execution space, which is the
2860  // same as execution_space.
2861  ::Tpetra::Details::copyOffsets (ptr_rot, ptr_st);
2862  }
2863  }
2864 
2865  Kokkos::View<LocalOrdinal*, layout_type, device_type> k_ind =
2866  Kokkos::Compat::getKokkosViewDeepCopy<device_type> (columnIndices ());
2867  setAllIndices (ptr_rot, k_ind);
2868  }
2869 
2870 
2871  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2872  void
2875  {
2876  using Teuchos::Comm;
2877  using Teuchos::outArg;
2878  using Teuchos::RCP;
2879  using Teuchos::rcp;
2880  using Teuchos::REDUCE_MAX;
2881  using Teuchos::REDUCE_MIN;
2882  using Teuchos::reduceAll;
2883  using std::endl;
2884  using crs_graph_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
2885  using LO = local_ordinal_type;
2886  using GO = global_ordinal_type;
2887  using size_type = typename Teuchos::Array<GO>::size_type;
2888  const char tfecfFuncName[] = "globalAssemble: "; // for exception macro
2889 
2890  std::unique_ptr<std::string> prefix;
2891  if (verbose_) {
2892  prefix = this->createPrefix("CrsGraph", "globalAssemble");
2893  std::ostringstream os;
2894  os << *prefix << "Start" << endl;
2895  std::cerr << os.str();
2896  }
2897  RCP<const Comm<int> > comm = getComm ();
2898 
2899  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2900  (! isFillActive (), std::runtime_error, "Fill must be active before "
2901  "you may call this method.");
2902 
2903  const size_t myNumNonlocalRows = this->nonlocals_.size ();
2904 
2905  // If no processes have nonlocal rows, then we don't have to do
2906  // anything. Checking this is probably cheaper than constructing
2907  // the Map of nonlocal rows (see below) and noticing that it has
2908  // zero global entries.
2909  {
2910  const int iHaveNonlocalRows = (myNumNonlocalRows == 0) ? 0 : 1;
2911  int someoneHasNonlocalRows = 0;
2912  reduceAll<int, int> (*comm, REDUCE_MAX, iHaveNonlocalRows,
2913  outArg (someoneHasNonlocalRows));
2914  if (someoneHasNonlocalRows == 0) {
2915  if (verbose_) {
2916  std::ostringstream os;
2917  os << *prefix << "Done: No nonlocal rows" << endl;
2918  std::cerr << os.str();
2919  }
2920  return;
2921  }
2922  else if (verbose_) {
2923  std::ostringstream os;
2924  os << *prefix << "At least 1 process has nonlocal rows"
2925  << endl;
2926  std::cerr << os.str();
2927  }
2928  }
2929 
2930  // 1. Create a list of the "nonlocal" rows on each process. this
2931  // requires iterating over nonlocals_, so while we do this,
2932  // deduplicate the entries and get a count for each nonlocal
2933  // row on this process.
2934  // 2. Construct a new row Map corresponding to those rows. This
2935  // Map is likely overlapping. We know that the Map is not
2936  // empty on all processes, because the above all-reduce and
2937  // return exclude that case.
2938 
2939  RCP<const map_type> nonlocalRowMap;
2940  // Keep this for CrsGraph's constructor.
2941  Teuchos::Array<size_t> numEntPerNonlocalRow (myNumNonlocalRows);
2942  {
2943  Teuchos::Array<GO> myNonlocalGblRows (myNumNonlocalRows);
2944  size_type curPos = 0;
2945  for (auto mapIter = this->nonlocals_.begin ();
2946  mapIter != this->nonlocals_.end ();
2947  ++mapIter, ++curPos) {
2948  myNonlocalGblRows[curPos] = mapIter->first;
2949  std::vector<GO>& gblCols = mapIter->second; // by ref; change in place
2950  std::sort (gblCols.begin (), gblCols.end ());
2951  auto vecLast = std::unique (gblCols.begin (), gblCols.end ());
2952  gblCols.erase (vecLast, gblCols.end ());
2953  numEntPerNonlocalRow[curPos] = gblCols.size ();
2954  }
2955 
2956  // Currently, Map requires that its indexBase be the global min
2957  // of all its global indices. Map won't compute this for us, so
2958  // we must do it. If our process has no nonlocal rows, set the
2959  // "min" to the max possible GO value. This ensures that if
2960  // some process has at least one nonlocal row, then it will pick
2961  // that up as the min. We know that at least one process has a
2962  // nonlocal row, since the all-reduce and return at the top of
2963  // this method excluded that case.
2964  GO myMinNonlocalGblRow = std::numeric_limits<GO>::max ();
2965  {
2966  auto iter = std::min_element (myNonlocalGblRows.begin (),
2967  myNonlocalGblRows.end ());
2968  if (iter != myNonlocalGblRows.end ()) {
2969  myMinNonlocalGblRow = *iter;
2970  }
2971  }
2972  GO gblMinNonlocalGblRow = 0;
2973  reduceAll<int, GO> (*comm, REDUCE_MIN, myMinNonlocalGblRow,
2974  outArg (gblMinNonlocalGblRow));
2975  const GO indexBase = gblMinNonlocalGblRow;
2976  const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid ();
2977  nonlocalRowMap = rcp (new map_type (INV, myNonlocalGblRows (), indexBase, comm));
2978  }
2979 
2980  if (verbose_) {
2981  std::ostringstream os;
2982  os << *prefix << "nonlocalRowMap->getIndexBase()="
2983  << nonlocalRowMap->getIndexBase() << endl;
2984  std::cerr << os.str();
2985  }
2986 
2987  // 3. Use the column indices for each nonlocal row, as stored in
2988  // nonlocals_, to construct a CrsGraph corresponding to
2989  // nonlocal rows. We need, but we have, exact counts of the
2990  // number of entries in each nonlocal row.
2991 
2992  RCP<crs_graph_type> nonlocalGraph =
2993  rcp(new crs_graph_type(nonlocalRowMap, numEntPerNonlocalRow()));
2994  {
2995  size_type curPos = 0;
2996  for (auto mapIter = this->nonlocals_.begin ();
2997  mapIter != this->nonlocals_.end ();
2998  ++mapIter, ++curPos) {
2999  const GO gblRow = mapIter->first;
3000  std::vector<GO>& gblCols = mapIter->second; // by ref just to avoid copy
3001  const LO numEnt = static_cast<LO> (numEntPerNonlocalRow[curPos]);
3002  nonlocalGraph->insertGlobalIndices (gblRow, numEnt, gblCols.data ());
3003  }
3004  }
3005  if (verbose_) {
3006  std::ostringstream os;
3007  os << *prefix << "Built nonlocal graph" << endl;
3008  std::cerr << os.str();
3009  }
3010  // There's no need to fill-complete the nonlocals graph.
3011  // We just use it as a temporary container for the Export.
3012 
3013  // 4. If the original row Map is one to one, then we can Export
3014  // directly from nonlocalGraph into this. Otherwise, we have
3015  // to create a temporary graph with a one-to-one row Map,
3016  // Export into that, then Import from the temporary graph into
3017  // *this.
3018 
3019  auto origRowMap = this->getRowMap ();
3020  const bool origRowMapIsOneToOne = origRowMap->isOneToOne ();
3021 
3022  if (origRowMapIsOneToOne) {
3023  if (verbose_) {
3024  std::ostringstream os;
3025  os << *prefix << "Original row Map is 1-to-1" << endl;
3026  std::cerr << os.str();
3027  }
3028  export_type exportToOrig (nonlocalRowMap, origRowMap);
3029  this->doExport (*nonlocalGraph, exportToOrig, Tpetra::INSERT);
3030  // We're done at this point!
3031  }
3032  else {
3033  if (verbose_) {
3034  std::ostringstream os;
3035  os << *prefix << "Original row Map is NOT 1-to-1" << endl;
3036  std::cerr << os.str();
3037  }
3038  // If you ask a Map whether it is one to one, it does some
3039  // communication and stashes intermediate results for later use
3040  // by createOneToOne. Thus, calling createOneToOne doesn't cost
3041  // much more then the original cost of calling isOneToOne.
3042  auto oneToOneRowMap = Tpetra::createOneToOne (origRowMap);
3043  export_type exportToOneToOne (nonlocalRowMap, oneToOneRowMap);
3044 
3045  // Create a temporary graph with the one-to-one row Map.
3046  //
3047  // TODO (mfh 09 Sep 2016) Estimate the number of entries in each
3048  // row, to avoid reallocation during the Export operation.
3049  crs_graph_type oneToOneGraph (oneToOneRowMap, 0);
3050 
3051  // Export from graph of nonlocals into the temp one-to-one graph.
3052  if (verbose_) {
3053  std::ostringstream os;
3054  os << *prefix << "Export nonlocal graph" << endl;
3055  std::cerr << os.str();
3056  }
3057  oneToOneGraph.doExport (*nonlocalGraph, exportToOneToOne, Tpetra::INSERT);
3058 
3059  // We don't need the graph of nonlocals anymore, so get rid of
3060  // it, to keep the memory high-water mark down.
3061  nonlocalGraph = Teuchos::null;
3062 
3063  // Import from the one-to-one graph to the original graph.
3064  import_type importToOrig (oneToOneRowMap, origRowMap);
3065  if (verbose_) {
3066  std::ostringstream os;
3067  os << *prefix << "Import nonlocal graph" << endl;
3068  std::cerr << os.str();
3069  }
3070  this->doImport (oneToOneGraph, importToOrig, Tpetra::INSERT);
3071  }
3072 
3073  // It's safe now to clear out nonlocals_, since we've already
3074  // committed side effects to *this. The standard idiom for
3075  // clearing a Container like std::map, is to swap it with an empty
3076  // Container and let the swapped Container fall out of scope.
3077  decltype (this->nonlocals_) newNonlocals;
3078  std::swap (this->nonlocals_, newNonlocals);
3079 
3080  checkInternalState ();
3081  if (verbose_) {
3082  std::ostringstream os;
3083  os << *prefix << "Done" << endl;
3084  std::cerr << os.str();
3085  }
3086  }
3087 
3088 
3089  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3090  void
3092  resumeFill (const Teuchos::RCP<Teuchos::ParameterList>& params)
3093  {
3094  clearGlobalConstants();
3095  if (params != Teuchos::null) this->setParameterList (params);
3096  // either still sorted/merged or initially sorted/merged
3097  indicesAreSorted_ = true;
3098  noRedundancies_ = true;
3099  fillComplete_ = false;
3100  }
3101 
3102 
3103  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3104  void
3106  fillComplete (const Teuchos::RCP<Teuchos::ParameterList>& params)
3107  {
3108  // If the graph already has domain and range Maps, don't clobber
3109  // them. If it doesn't, use the current row Map for both the
3110  // domain and range Maps.
3111  //
3112  // NOTE (mfh 28 Sep 2014): If the graph was constructed without a
3113  // column Map, and column indices are inserted which are not in
3114  // the row Map on any process, this will cause troubles. However,
3115  // that is not a common case for most applications that we
3116  // encounter, and checking for it might require more
3117  // communication.
3118  Teuchos::RCP<const map_type> domMap = this->getDomainMap ();
3119  if (domMap.is_null ()) {
3120  domMap = this->getRowMap ();
3121  }
3122  Teuchos::RCP<const map_type> ranMap = this->getRangeMap ();
3123  if (ranMap.is_null ()) {
3124  ranMap = this->getRowMap ();
3125  }
3126  this->fillComplete (domMap, ranMap, params);
3127  }
3128 
3129 
3130  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3131  void
3133  fillComplete (const Teuchos::RCP<const map_type>& domainMap,
3134  const Teuchos::RCP<const map_type>& rangeMap,
3135  const Teuchos::RCP<Teuchos::ParameterList>& params)
3136  {
3137  using std::endl;
3138  const char tfecfFuncName[] = "fillComplete: ";
3139  const bool verbose = verbose_;
3140 
3141  std::unique_ptr<std::string> prefix;
3142  if (verbose) {
3143  prefix = this->createPrefix("CrsGraph", "fillComplete");
3144  std::ostringstream os;
3145  os << *prefix << "Start" << endl;
3146  std::cerr << os.str();
3147  }
3148 
3149  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3150  (! isFillActive () || isFillComplete (), std::runtime_error,
3151  "Graph fill state must be active (isFillActive() "
3152  "must be true) before calling fillComplete().");
3153 
3154  const int numProcs = getComm ()->getSize ();
3155 
3156  //
3157  // Read and set parameters
3158  //
3159 
3160  // Does the caller want to sort remote GIDs (within those owned by
3161  // the same process) in makeColMap()?
3162  if (! params.is_null ()) {
3163  if (params->isParameter ("sort column map ghost gids")) {
3164  sortGhostsAssociatedWithEachProcessor_ =
3165  params->get<bool> ("sort column map ghost gids",
3166  sortGhostsAssociatedWithEachProcessor_);
3167  }
3168  else if (params->isParameter ("Sort column Map ghost GIDs")) {
3169  sortGhostsAssociatedWithEachProcessor_ =
3170  params->get<bool> ("Sort column Map ghost GIDs",
3171  sortGhostsAssociatedWithEachProcessor_);
3172  }
3173  }
3174 
3175  // If true, the caller promises that no process did nonlocal
3176  // changes since the last call to fillComplete.
3177  bool assertNoNonlocalInserts = false;
3178  if (! params.is_null ()) {
3179  assertNoNonlocalInserts =
3180  params->get<bool> ("No Nonlocal Changes", assertNoNonlocalInserts);
3181  }
3182 
3183  //
3184  // Allocate indices, if they haven't already been allocated
3185  //
3186  if (! indicesAreAllocated ()) {
3187  if (hasColMap ()) {
3188  // We have a column Map, so use local indices.
3189  allocateIndices (LocalIndices, verbose);
3190  } else {
3191  // We don't have a column Map, so use global indices.
3192  allocateIndices (GlobalIndices, verbose);
3193  }
3194  }
3195 
3196  //
3197  // Do global assembly, if requested and if the communicator
3198  // contains more than one process.
3199  //
3200  const bool mayNeedGlobalAssemble = ! assertNoNonlocalInserts && numProcs > 1;
3201  if (mayNeedGlobalAssemble) {
3202  // This first checks if we need to do global assembly.
3203  // The check costs a single all-reduce.
3204  globalAssemble ();
3205  }
3206  else {
3207  const size_t numNonlocals = nonlocals_.size();
3208  if (verbose) {
3209  std::ostringstream os;
3210  os << *prefix << "Do not need to call globalAssemble; "
3211  "assertNoNonlocalInserts="
3212  << (assertNoNonlocalInserts ? "true" : "false")
3213  << "numProcs=" << numProcs
3214  << ", nonlocals_.size()=" << numNonlocals << endl;
3215  std::cerr << os.str();
3216  }
3217  const int lclNeededGlobalAssemble =
3218  (numProcs > 1 && numNonlocals != 0) ? 1 : 0;
3219  if (lclNeededGlobalAssemble != 0 && verbose) {
3220  std::ostringstream os;
3221  os << *prefix;
3222  Details::Impl::verbosePrintMap(
3223  os, nonlocals_.begin(), nonlocals_.end(),
3224  nonlocals_.size(), "nonlocals_");
3225  std::cerr << os.str() << endl;
3226  }
3227 
3228  if (debug_) {
3229  auto map = this->getMap();
3230  auto comm = map.is_null() ? Teuchos::null : map->getComm();
3231  int gblNeededGlobalAssemble = lclNeededGlobalAssemble;
3232  if (! comm.is_null()) {
3233  using Teuchos::REDUCE_MAX;
3234  using Teuchos::reduceAll;
3235  reduceAll(*comm, REDUCE_MAX, lclNeededGlobalAssemble,
3236  Teuchos::outArg(gblNeededGlobalAssemble));
3237  }
3238  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3239  (gblNeededGlobalAssemble != 0, std::runtime_error,
3240  "nonlocals_.size()=" << numNonlocals << " != 0 on at "
3241  "least one process in the CrsGraph's communicator. This "
3242  "means either that you incorrectly set the "
3243  "\"No Nonlocal Changes\" fillComplete parameter to true, "
3244  "or that you inserted invalid entries. "
3245  "Rerun with the environment variable TPETRA_VERBOSE="
3246  "CrsGraph set to see the entries of nonlocals_ on every "
3247  "MPI process (WARNING: lots of output).");
3248  }
3249  else {
3250  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3251  (lclNeededGlobalAssemble != 0, std::runtime_error,
3252  "nonlocals_.size()=" << numNonlocals << " != 0 on the "
3253  "calling process. This means either that you incorrectly "
3254  "set the \"No Nonlocal Changes\" fillComplete parameter "
3255  "to true, or that you inserted invalid entries. "
3256  "Rerun with the environment "
3257  "variable TPETRA_VERBOSE=CrsGraph set to see the entries "
3258  "of nonlocals_ on every MPI process (WARNING: lots of "
3259  "output).");
3260  }
3261  }
3262 
3263  // Set domain and range Map. This may clear the Import / Export
3264  // objects if the new Maps differ from any old ones.
3265  setDomainRangeMaps (domainMap, rangeMap);
3266 
3267  // If the graph does not already have a column Map (either from
3268  // the user constructor calling the version of the constructor
3269  // that takes a column Map, or from a previous fillComplete call),
3270  // then create it.
3271  Teuchos::Array<int> remotePIDs (0);
3272  const bool mustBuildColMap = ! this->hasColMap ();
3273  if (mustBuildColMap) {
3274  this->makeColMap (remotePIDs); // resized on output
3275  }
3276 
3277  // Make indices local, if they aren't already.
3278  // The method doesn't do any work if the indices are already local.
3279  const std::pair<size_t, std::string> makeIndicesLocalResult =
3280  this->makeIndicesLocal(verbose);
3281 
3282  if (debug_) {
3283  using Details::gathervPrint;
3284  using Teuchos::RCP;
3285  using Teuchos::REDUCE_MIN;
3286  using Teuchos::reduceAll;
3287  using Teuchos::outArg;
3288 
3289  RCP<const map_type> map = this->getMap ();
3290  RCP<const Teuchos::Comm<int> > comm;
3291  if (! map.is_null ()) {
3292  comm = map->getComm ();
3293  }
3294  if (comm.is_null ()) {
3295  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3296  (makeIndicesLocalResult.first != 0, std::runtime_error,
3297  makeIndicesLocalResult.second);
3298  }
3299  else {
3300  const int lclSuccess = (makeIndicesLocalResult.first == 0);
3301  int gblSuccess = 0; // output argument
3302  reduceAll (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
3303  if (gblSuccess != 1) {
3304  std::ostringstream os;
3305  gathervPrint (os, makeIndicesLocalResult.second, *comm);
3306  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3307  (true, std::runtime_error, os.str ());
3308  }
3309  }
3310  }
3311  else {
3312  // TODO (mfh 20 Jul 2017) Instead of throwing here, pass along
3313  // the error state to makeImportExport or
3314  // computeGlobalConstants, which may do all-reduces and thus may
3315  // have the opportunity to communicate that error state.
3316  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3317  (makeIndicesLocalResult.first != 0, std::runtime_error,
3318  makeIndicesLocalResult.second);
3319  }
3320 
3321  // If this process has no indices, then CrsGraph considers it
3322  // already trivially sorted and merged. Thus, this method need
3323  // not be called on all processes in the row Map's communicator.
3324  this->sortAndMergeAllIndices (this->isSorted (), this->isMerged ());
3325 
3326  // Make Import and Export objects, if they haven't been made
3327  // already. If we made a column Map above, reuse information from
3328  // that process to avoid communiation in the Import setup.
3329  this->makeImportExport (remotePIDs, mustBuildColMap);
3330 
3331  // Create the Kokkos::StaticCrsGraph, if it doesn't already exist.
3332  this->fillLocalGraph (params);
3333 
3334  const bool callComputeGlobalConstants = params.get () == nullptr ||
3335  params->get ("compute global constants", true);
3336  if (callComputeGlobalConstants) {
3337  this->computeGlobalConstants ();
3338  }
3339  else {
3340  this->computeLocalConstants ();
3341  }
3342  this->fillComplete_ = true;
3343  this->checkInternalState ();
3344 
3345  if (verbose) {
3346  std::ostringstream os;
3347  os << *prefix << "Done" << endl;
3348  std::cerr << os.str();
3349  }
3350  }
3351 
3352 
3353  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3354  void
3356  expertStaticFillComplete (const Teuchos::RCP<const map_type>& domainMap,
3357  const Teuchos::RCP<const map_type>& rangeMap,
3358  const Teuchos::RCP<const import_type>& importer,
3359  const Teuchos::RCP<const export_type>& exporter,
3360  const Teuchos::RCP<Teuchos::ParameterList>& params)
3361  {
3362  const char tfecfFuncName[] = "expertStaticFillComplete: ";
3363 #ifdef HAVE_TPETRA_MMM_TIMINGS
3364  std::string label;
3365  if(!params.is_null())
3366  label = params->get("Timer Label",label);
3367  std::string prefix = std::string("Tpetra ")+ label + std::string(": ");
3368  using Teuchos::TimeMonitor;
3369  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Setup"))));
3370 #endif
3371 
3372 
3373  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3374  domainMap.is_null () || rangeMap.is_null (),
3375  std::runtime_error, "The input domain Map and range Map must be nonnull.");
3376  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3377  isFillComplete () || ! hasColMap (), std::runtime_error, "You may not "
3378  "call this method unless the graph has a column Map.");
3379  auto rowPtrsUnpackedLength = this->getRowPtrsUnpackedHost().extent (0);
3380  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3381  getLocalNumRows () > 0 && rowPtrsUnpackedLength == 0,
3382  std::runtime_error, "The calling process has getLocalNumRows() = "
3383  << getLocalNumRows () << " > 0 rows, but the row offsets array has not "
3384  "been set.");
3385  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3386  static_cast<size_t> (rowPtrsUnpackedLength) != getLocalNumRows () + 1,
3387  std::runtime_error, "The row offsets array has length " <<
3388  rowPtrsUnpackedLength << " != getLocalNumRows()+1 = " <<
3389  (getLocalNumRows () + 1) << ".");
3390 
3391  // Note: We don't need to do the following things which are normally done in fillComplete:
3392  // allocateIndices, globalAssemble, makeColMap, makeIndicesLocal, sortAndMergeAllIndices
3393 
3394  // Constants from allocateIndices
3395  //
3396  // mfh 08 Aug 2014: numAllocForAllRows_ and k_numAllocPerRow_ go
3397  // away once the graph is allocated. expertStaticFillComplete
3398  // either presumes that the graph is allocated, or "allocates" it.
3399  //
3400  // FIXME (mfh 08 Aug 2014) The goal for the Kokkos refactor
3401  // version of CrsGraph is to allocate in the constructor, not
3402  // lazily on first insert. That will make both
3403  // numAllocForAllRows_ and k_numAllocPerRow_ obsolete.
3404  numAllocForAllRows_ = 0;
3405  k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
3406  indicesAreAllocated_ = true;
3407 
3408  // Constants from makeIndicesLocal
3409  //
3410  // The graph has a column Map, so its indices had better be local.
3411  indicesAreLocal_ = true;
3412  indicesAreGlobal_ = false;
3413 
3414  // set domain/range map: may clear the import/export objects
3415 #ifdef HAVE_TPETRA_MMM_TIMINGS
3416  MM = Teuchos::null;
3417  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Maps"))));
3418 #endif
3419  setDomainRangeMaps (domainMap, rangeMap);
3420 
3421  // Presume the user sorted and merged the arrays first
3422  indicesAreSorted_ = true;
3423  noRedundancies_ = true;
3424 
3425  // makeImportExport won't create a new importer/exporter if I set one here first.
3426 #ifdef HAVE_TPETRA_MMM_TIMINGS
3427  MM = Teuchos::null;
3428  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckI"))));
3429 #endif
3430 
3431  importer_ = Teuchos::null;
3432  exporter_ = Teuchos::null;
3433  if (importer != Teuchos::null) {
3434  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3435  ! importer->getSourceMap ()->isSameAs (*getDomainMap ()) ||
3436  ! importer->getTargetMap ()->isSameAs (*getColMap ()),
3437  std::invalid_argument,": importer does not match matrix maps.");
3438  importer_ = importer;
3439 
3440  }
3441 
3442 #ifdef HAVE_TPETRA_MMM_TIMINGS
3443  MM = Teuchos::null;
3444  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckE"))));
3445 #endif
3446 
3447  if (exporter != Teuchos::null) {
3448  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3449  ! exporter->getSourceMap ()->isSameAs (*getRowMap ()) ||
3450  ! exporter->getTargetMap ()->isSameAs (*getRangeMap ()),
3451  std::invalid_argument,": exporter does not match matrix maps.");
3452  exporter_ = exporter;
3453  }
3454 
3455 #ifdef HAVE_TPETRA_MMM_TIMINGS
3456  MM = Teuchos::null;
3457  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXmake"))));
3458 #endif
3459  Teuchos::Array<int> remotePIDs (0); // unused output argument
3460  this->makeImportExport (remotePIDs, false);
3461 
3462 #ifdef HAVE_TPETRA_MMM_TIMINGS
3463  MM = Teuchos::null;
3464  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-fLG"))));
3465 #endif
3466  this->fillLocalGraph (params);
3467 
3468  const bool callComputeGlobalConstants = params.get () == nullptr ||
3469  params->get ("compute global constants", true);
3470 
3471  if (callComputeGlobalConstants) {
3472 #ifdef HAVE_TPETRA_MMM_TIMINGS
3473  MM = Teuchos::null;
3474  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cGC (const)"))));
3475 #endif // HAVE_TPETRA_MMM_TIMINGS
3476  this->computeGlobalConstants ();
3477  }
3478  else {
3479 #ifdef HAVE_TPETRA_MMM_TIMINGS
3480  MM = Teuchos::null;
3481  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cGC (noconst)"))));
3482 #endif // HAVE_TPETRA_MMM_TIMINGS
3483  this->computeLocalConstants ();
3484  }
3485 
3486  fillComplete_ = true;
3487 
3488 #ifdef HAVE_TPETRA_MMM_TIMINGS
3489  MM = Teuchos::null;
3490  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cIS"))));
3491 #endif
3492  checkInternalState ();
3493  }
3494 
3495 
3496  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3497  void
3499  fillLocalGraph (const Teuchos::RCP<Teuchos::ParameterList>& params)
3500  {
3502  typedef typename local_graph_device_type::row_map_type row_map_type;
3503  typedef typename row_map_type::non_const_type non_const_row_map_type;
3504  typedef typename local_graph_device_type::entries_type::non_const_type lclinds_1d_type;
3505  const char tfecfFuncName[] = "fillLocalGraph (called from fillComplete or "
3506  "expertStaticFillComplete): ";
3507  const size_t lclNumRows = this->getLocalNumRows ();
3508 
3509  // This method's goal is to fill in the two arrays (compressed
3510  // sparse row format) that define the sparse graph's structure.
3511 
3512  bool requestOptimizedStorage = true;
3513  if (! params.is_null () && ! params->get ("Optimize Storage", true)) {
3514  requestOptimizedStorage = false;
3515  }
3516 
3517  // The graph's column indices are currently stored in a 1-D
3518  // format, with row offsets in rowPtrsUnpacked_host_ and local column indices
3519  // in k_lclInds1D_.
3520 
3521  if (debug_) {
3522  auto rowPtrsUnpacked = this->getRowPtrsUnpackedHost();
3523  // The graph's array of row offsets must already be allocated.
3524  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3525  (rowPtrsUnpacked.extent (0) == 0, std::logic_error,
3526  "rowPtrsUnpacked_host_ has size zero, but shouldn't");
3527  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3528  (rowPtrsUnpacked.extent (0) != lclNumRows + 1, std::logic_error,
3529  "rowPtrsUnpacked_host_.extent(0) = "
3530  << rowPtrsUnpacked.extent (0) << " != (lclNumRows + 1) = "
3531  << (lclNumRows + 1) << ".");
3532  const size_t numOffsets = rowPtrsUnpacked.extent (0);
3533  const auto valToCheck = rowPtrsUnpacked(numOffsets-1);
3534  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3535  (numOffsets != 0 &&
3536  lclIndsUnpacked_wdv.extent (0) != valToCheck,
3537  std::logic_error, "numOffsets=" << numOffsets << " != 0 "
3538  " and lclIndsUnpacked_wdv.extent(0)=" << lclIndsUnpacked_wdv.extent(0)
3539  << " != rowPtrsUnpacked_host_(" << numOffsets << ")=" << valToCheck
3540  << ".");
3541  }
3542 
3543  size_t allocSize = 0;
3544  try {
3545  allocSize = this->getLocalAllocationSize ();
3546  }
3547  catch (std::logic_error& e) {
3548  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3549  (true, std::logic_error, "getLocalAllocationSize threw "
3550  "std::logic_error: " << e.what ());
3551  }
3552  catch (std::runtime_error& e) {
3553  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3554  (true, std::runtime_error, "getLocalAllocationSize threw "
3555  "std::runtime_error: " << e.what ());
3556  }
3557  catch (std::exception& e) {
3558  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3559  (true, std::runtime_error, "getLocalAllocationSize threw "
3560  "std::exception: " << e.what ());
3561  }
3562  catch (...) {
3563  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3564  (true, std::runtime_error, "getLocalAllocationSize threw "
3565  "an exception not a subclass of std::exception.");
3566  }
3567 
3568  if (this->getLocalNumEntries () != allocSize) {
3569  // Use the nonconst version of row_map_type for ptr_d, because
3570  // the latter is const and we need to modify ptr_d here.
3571  non_const_row_map_type ptr_d;
3572  row_map_type ptr_d_const;
3573 
3574  // The graph's current 1-D storage is "unpacked." This means
3575  // the row offsets may differ from what the final row offsets
3576  // should be. This could happen, for example, if the user set
3577  // an upper bound on the number of entries in each row, but
3578  // didn't fill all those entries.
3579 
3580  if (debug_) {
3581  auto rowPtrsUnpacked = this->getRowPtrsUnpackedHost();
3582  if (rowPtrsUnpacked.extent (0) != 0) {
3583  const size_t numOffsets =
3584  static_cast<size_t> (rowPtrsUnpacked.extent (0));
3585  const auto valToCheck = rowPtrsUnpacked(numOffsets - 1);
3586  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3587  (valToCheck != size_t(lclIndsUnpacked_wdv.extent(0)),
3588  std::logic_error, "(Unpacked branch) Before allocating "
3589  "or packing, k_rowPtrs_(" << (numOffsets-1) << ")="
3590  << valToCheck << " != lclIndsUnpacked_wdv.extent(0)="
3591  << lclIndsUnpacked_wdv.extent (0) << ".");
3592  }
3593  }
3594 
3595  // Pack the row offsets into ptr_d, by doing a sum-scan of the
3596  // array of valid entry counts per row (k_numRowEntries_).
3597 
3598  // Total number of entries in the matrix on the calling
3599  // process. We will compute this in the loop below. It's
3600  // cheap to compute and useful as a sanity check.
3601  size_t lclTotalNumEntries = 0;
3602  {
3603  // Allocate the packed row offsets array.
3604  ptr_d =
3605  non_const_row_map_type ("Tpetra::CrsGraph::ptr", lclNumRows + 1);
3606  ptr_d_const = ptr_d;
3607 
3608  // It's ok that k_numRowEntries_ is a host View; the
3609  // function can handle this.
3610  typename num_row_entries_type::const_type numRowEnt_h = k_numRowEntries_;
3611  if (debug_) {
3612  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3613  (size_t(numRowEnt_h.extent (0)) != lclNumRows,
3614  std::logic_error, "(Unpacked branch) "
3615  "numRowEnt_h.extent(0)=" << numRowEnt_h.extent(0)
3616  << " != getLocalNumRows()=" << lclNumRows << "");
3617  }
3618 
3619  lclTotalNumEntries = computeOffsetsFromCounts (ptr_d, numRowEnt_h);
3620 
3621  if (debug_) {
3622  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3623  (static_cast<size_t> (ptr_d.extent (0)) != lclNumRows + 1,
3624  std::logic_error, "(Unpacked branch) After allocating "
3625  "ptr_d, ptr_d.extent(0) = " << ptr_d.extent(0)
3626  << " != lclNumRows+1 = " << (lclNumRows+1) << ".");
3627  const auto valToCheck =
3628  ::Tpetra::Details::getEntryOnHost (ptr_d, lclNumRows);
3629  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3630  (valToCheck != lclTotalNumEntries, std::logic_error,
3631  "Tpetra::CrsGraph::fillLocalGraph: In unpacked branch, "
3632  "after filling ptr_d, ptr_d(lclNumRows=" << lclNumRows
3633  << ") = " << valToCheck << " != total number of entries "
3634  "on the calling process = " << lclTotalNumEntries
3635  << ".");
3636  }
3637  }
3638 
3639  // Allocate the array of packed column indices.
3640  lclinds_1d_type ind_d =
3641  lclinds_1d_type ("Tpetra::CrsGraph::lclInd", lclTotalNumEntries);
3642 
3643  // k_rowPtrs_ and lclIndsUnpacked_wdv are currently unpacked. Pack
3644  // them, using the packed row offsets array ptr_d that we
3645  // created above.
3646  //
3647  // FIXME (mfh 08 Aug 2014) If "Optimize Storage" is false (in
3648  // CrsMatrix?), we need to keep around the unpacked row
3649  // offsets and column indices.
3650 
3651  // Pack the column indices from unpacked lclIndsUnpacked_wdv into
3652  // packed ind_d. We will replace lclIndsUnpacked_wdv below.
3653  typedef pack_functor<
3654  typename local_graph_device_type::entries_type::non_const_type,
3655  typename local_inds_dualv_type::t_dev::const_type,
3656  row_map_type,
3657  typename local_graph_device_type::row_map_type> inds_packer_type;
3658  inds_packer_type f (ind_d,
3659  lclIndsUnpacked_wdv.getDeviceView(Access::ReadOnly),
3660  ptr_d, this->getRowPtrsUnpackedDevice());
3661  {
3662  typedef typename decltype (ind_d)::execution_space exec_space;
3663  typedef Kokkos::RangePolicy<exec_space, LocalOrdinal> range_type;
3664  Kokkos::parallel_for (range_type (0, lclNumRows), f);
3665  }
3666 
3667  if (debug_) {
3668  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3669  (ptr_d.extent (0) == 0, std::logic_error,
3670  "(\"Optimize Storage\"=true branch) After packing, "
3671  "ptr_d.extent(0)=0.");
3672  if (ptr_d.extent (0) != 0) {
3673  const size_t numOffsets = static_cast<size_t> (ptr_d.extent (0));
3674  const auto valToCheck =
3675  ::Tpetra::Details::getEntryOnHost (ptr_d, numOffsets - 1);
3676  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3677  (static_cast<size_t> (valToCheck) != ind_d.extent (0),
3678  std::logic_error, "(\"Optimize Storage\"=true branch) "
3679  "After packing, ptr_d(" << (numOffsets-1) << ")="
3680  << valToCheck << " != ind_d.extent(0)="
3681  << ind_d.extent(0) << ".");
3682  }
3683  }
3684  // Build the local graph.
3685  if (requestOptimizedStorage)
3686  setRowPtrs(ptr_d_const);
3687  else
3688  setRowPtrsPacked(ptr_d_const);
3689  lclIndsPacked_wdv = local_inds_wdv_type(ind_d);
3690  }
3691  else { // We don't have to pack, so just set the pointers.
3692  //Set both packed and unpacked rowptrs to this
3693  this->setRowPtrs(rowPtrsUnpacked_dev_);
3694  lclIndsPacked_wdv = lclIndsUnpacked_wdv;
3695 
3696  if (debug_) {
3697  auto rowPtrsPacked_dev = this->getRowPtrsPackedDevice();
3698  auto rowPtrsPacked_host = this->getRowPtrsPackedHost();
3699  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3700  (rowPtrsPacked_dev.extent (0) == 0, std::logic_error,
3701  "(\"Optimize Storage\"=false branch) "
3702  "rowPtrsPacked_dev_.extent(0) = 0.");
3703  if (rowPtrsPacked_dev.extent (0) != 0) {
3704  const size_t numOffsets =
3705  static_cast<size_t> (rowPtrsPacked_dev.extent (0));
3706  const size_t valToCheck =
3707  rowPtrsPacked_host(numOffsets - 1);
3708  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3709  (valToCheck != size_t(lclIndsPacked_wdv.extent (0)),
3710  std::logic_error, "(\"Optimize Storage\"=false branch) "
3711  "rowPtrsPacked_dev_(" << (numOffsets-1) << ")="
3712  << valToCheck
3713  << " != lclIndsPacked_wdv.extent(0)="
3714  << lclIndsPacked_wdv.extent (0) << ".");
3715  }
3716  }
3717  }
3718 
3719  if (debug_) {
3720  auto rowPtrsPacked_dev = this->getRowPtrsPackedDevice();
3721  auto rowPtrsPacked_host = this->getRowPtrsPackedHost();
3722  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3723  (static_cast<size_t> (rowPtrsPacked_dev.extent (0)) != lclNumRows + 1,
3724  std::logic_error, "After packing, rowPtrsPacked_dev_.extent(0) = " <<
3725  rowPtrsPacked_dev.extent (0) << " != lclNumRows+1 = " << (lclNumRows+1)
3726  << ".");
3727  if (rowPtrsPacked_dev.extent (0) != 0) {
3728  const size_t numOffsets = static_cast<size_t> (rowPtrsPacked_dev.extent (0));
3729  const auto valToCheck = rowPtrsPacked_host(numOffsets - 1);
3730  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3731  (static_cast<size_t> (valToCheck) != lclIndsPacked_wdv.extent (0),
3732  std::logic_error, "After packing, rowPtrsPacked_dev_(" << (numOffsets-1)
3733  << ") = " << valToCheck << " != lclIndsPacked_wdv.extent(0) = "
3734  << lclIndsPacked_wdv.extent (0) << ".");
3735  }
3736  }
3737 
3738  if (requestOptimizedStorage) {
3739  // With optimized storage, we don't need to store
3740  // the array of row entry counts.
3741 
3742  // Free graph data structures that are only needed for
3743  // unpacked 1-D storage.
3744  k_numRowEntries_ = num_row_entries_type ();
3745 
3746  // Keep the new 1-D packed allocations.
3747  lclIndsUnpacked_wdv = lclIndsPacked_wdv;
3748 
3749  storageStatus_ = Details::STORAGE_1D_PACKED;
3750  }
3751 
3752  set_need_sync_host_uvm_access(); // make sure kernel setup of indices is fenced before a host access
3753  }
3754 
3755  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3756  void
3758  replaceColMap (const Teuchos::RCP<const map_type>& newColMap)
3759  {
3760  // NOTE: This safety check matches the code, but not the documentation of Crsgraph
3761  //
3762  // FIXME (mfh 18 Aug 2014) This will break if the calling process
3763  // has no entries, because in that case, currently it is neither
3764  // locally nor globally indexed. This will change once we get rid
3765  // of lazy allocation (so that the constructor allocates indices
3766  // and therefore commits to local vs. global).
3767  const char tfecfFuncName[] = "replaceColMap: ";
3768  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3769  isLocallyIndexed () || isGloballyIndexed (), std::runtime_error,
3770  "Requires matching maps and non-static graph.");
3771  colMap_ = newColMap;
3772  }
3773 
3774  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3775  void
3777  reindexColumns (const Teuchos::RCP<const map_type>& newColMap,
3778  const Teuchos::RCP<const import_type>& newImport,
3779  const bool sortIndicesInEachRow)
3780  {
3781  using Teuchos::REDUCE_MIN;
3782  using Teuchos::reduceAll;
3783  using Teuchos::RCP;
3784  typedef GlobalOrdinal GO;
3785  typedef LocalOrdinal LO;
3786  typedef typename local_inds_dualv_type::t_host col_inds_type;
3787  const char tfecfFuncName[] = "reindexColumns: ";
3788 
3789  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3790  isFillComplete (), std::runtime_error, "The graph is fill complete "
3791  "(isFillComplete() returns true). You must call resumeFill() before "
3792  "you may call this method.");
3793 
3794  // mfh 19 Aug 2014: This method does NOT redistribute data; it
3795  // doesn't claim to do the work of an Import or Export. This
3796  // means that for all processes, the calling process MUST own all
3797  // column indices, in both the old column Map (if it exists) and
3798  // the new column Map. We check this via an all-reduce.
3799  //
3800  // Some processes may be globally indexed, others may be locally
3801  // indexed, and others (that have no graph entries) may be
3802  // neither. This method will NOT change the graph's current
3803  // state. If it's locally indexed, it will stay that way, and
3804  // vice versa. It would easy to add an option to convert indices
3805  // from global to local, so as to save a global-to-local
3806  // conversion pass. However, we don't do this here. The intended
3807  // typical use case is that the graph already has a column Map and
3808  // is locally indexed, and this is the case for which we optimize.
3809 
3810  const LO lclNumRows = static_cast<LO> (this->getLocalNumRows ());
3811 
3812  // Attempt to convert indices to the new column Map's version of
3813  // local. This will fail if on the calling process, the graph has
3814  // indices that are not on that process in the new column Map.
3815  // After the local conversion attempt, we will do an all-reduce to
3816  // see if any processes failed.
3817 
3818  // If this is false, then either the graph contains a column index
3819  // which is invalid in the CURRENT column Map, or the graph is
3820  // locally indexed but currently has no column Map. In either
3821  // case, there is no way to convert the current local indices into
3822  // global indices, so that we can convert them into the new column
3823  // Map's local indices. It's possible for this to be true on some
3824  // processes but not others, due to replaceColMap.
3825  bool allCurColIndsValid = true;
3826  // On the calling process, are all valid current column indices
3827  // also in the new column Map on the calling process? In other
3828  // words, does local reindexing suffice, or should the user have
3829  // done an Import or Export instead?
3830  bool localSuffices = true;
3831 
3832  // Final arrays for the local indices. We will allocate exactly
3833  // one of these ONLY if the graph is locally indexed on the
3834  // calling process, and ONLY if the graph has one or more entries
3835  // (is not empty) on the calling process. In that case, we
3836  // allocate the first (1-D storage) if the graph has a static
3837  // profile, else we allocate the second (2-D storage).
3838  col_inds_type newLclInds1D;
3839  auto oldLclInds1D = lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
3840 
3841  // If indices aren't allocated, that means the calling process
3842  // owns no entries in the graph. Thus, there is nothing to
3843  // convert, and it trivially succeeds locally.
3844  if (indicesAreAllocated ()) {
3845  if (isLocallyIndexed ()) {
3846  if (hasColMap ()) { // locally indexed, and currently has a column Map
3847  const map_type& oldColMap = * (getColMap ());
3848  // Allocate storage for the new local indices.
3849  const size_t allocSize = this->getLocalAllocationSize ();
3850  newLclInds1D = col_inds_type("Tpetra::CrsGraph::lclIndsReindexedHost",
3851  allocSize);
3852  // Attempt to convert the new indices locally.
3853  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
3854  const RowInfo rowInfo = this->getRowInfo (lclRow);
3855  const size_t beg = rowInfo.offset1D;
3856  const size_t end = beg + rowInfo.numEntries;
3857  for (size_t k = beg; k < end; ++k) {
3858  const LO oldLclCol = oldLclInds1D(k);
3859  if (oldLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
3860  allCurColIndsValid = false;
3861  break; // Stop at the first invalid index
3862  }
3863  const GO gblCol = oldColMap.getGlobalElement (oldLclCol);
3864 
3865  // The above conversion MUST succeed. Otherwise, the
3866  // current local index is invalid, which means that
3867  // the graph was constructed incorrectly.
3868  if (gblCol == Teuchos::OrdinalTraits<GO>::invalid ()) {
3869  allCurColIndsValid = false;
3870  break; // Stop at the first invalid index
3871  }
3872  else {
3873  const LO newLclCol = newColMap->getLocalElement (gblCol);
3874  if (newLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
3875  localSuffices = false;
3876  break; // Stop at the first invalid index
3877  }
3878  newLclInds1D(k) = newLclCol;
3879  }
3880  } // for each entry in the current row
3881  } // for each locally owned row
3882  }
3883  else { // locally indexed, but no column Map
3884  // This case is only possible if replaceColMap() was called
3885  // with a null argument on the calling process. It's
3886  // possible, but it means that this method can't possibly
3887  // succeed, since we have no way of knowing how to convert
3888  // the current local indices to global indices.
3889  allCurColIndsValid = false;
3890  }
3891  }
3892  else { // globally indexed
3893  // If the graph is globally indexed, we don't need to save
3894  // local indices, but we _do_ need to know whether the current
3895  // global indices are valid in the new column Map. We may
3896  // need to do a getRemoteIndexList call to find this out.
3897  //
3898  // In this case, it doesn't matter whether the graph currently
3899  // has a column Map. We don't need the old column Map to
3900  // convert from global indices to the _new_ column Map's local
3901  // indices. Furthermore, we can use the same code, whether
3902  // the graph is static or dynamic profile.
3903 
3904  // Test whether the current global indices are in the new
3905  // column Map on the calling process.
3906  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
3907  const RowInfo rowInfo = this->getRowInfo (lclRow);
3908  auto oldGblRowView = this->getGlobalIndsViewHost (rowInfo);
3909  for (size_t k = 0; k < rowInfo.numEntries; ++k) {
3910  const GO gblCol = oldGblRowView(k);
3911  if (! newColMap->isNodeGlobalElement (gblCol)) {
3912  localSuffices = false;
3913  break; // Stop at the first invalid index
3914  }
3915  } // for each entry in the current row
3916  } // for each locally owned row
3917  } // locally or globally indexed
3918  } // whether indices are allocated
3919 
3920  // Do an all-reduce to check both possible error conditions.
3921  int lclSuccess[2];
3922  lclSuccess[0] = allCurColIndsValid ? 1 : 0;
3923  lclSuccess[1] = localSuffices ? 1 : 0;
3924  int gblSuccess[2];
3925  gblSuccess[0] = 0;
3926  gblSuccess[1] = 0;
3927  RCP<const Teuchos::Comm<int> > comm =
3928  getRowMap ().is_null () ? Teuchos::null : getRowMap ()->getComm ();
3929  if (! comm.is_null ()) {
3930  reduceAll<int, int> (*comm, REDUCE_MIN, 2, lclSuccess, gblSuccess);
3931  }
3932 
3933  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3934  gblSuccess[0] == 0, std::runtime_error, "It is not possible to continue."
3935  " The most likely reason is that the graph is locally indexed, but the "
3936  "column Map is missing (null) on some processes, due to a previous call "
3937  "to replaceColMap().");
3938 
3939  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3940  gblSuccess[1] == 0, std::runtime_error, "On some process, the graph "
3941  "contains column indices that are in the old column Map, but not in the "
3942  "new column Map (on that process). This method does NOT redistribute "
3943  "data; it does not claim to do the work of an Import or Export operation."
3944  " This means that for all processess, the calling process MUST own all "
3945  "column indices, in both the old column Map and the new column Map. In "
3946  "this case, you will need to do an Import or Export operation to "
3947  "redistribute data.");
3948 
3949  // Commit the results.
3950  if (isLocallyIndexed ()) {
3951  { // scope the device view; sortAndMergeAllIndices needs host
3952  typename local_inds_dualv_type::t_dev newLclInds1D_dev(
3953  Kokkos::view_alloc("Tpetra::CrsGraph::lclIndReindexed",
3954  Kokkos::WithoutInitializing),
3955  newLclInds1D.extent(0));
3956  Kokkos::deep_copy(newLclInds1D_dev, newLclInds1D);
3957  lclIndsUnpacked_wdv = local_inds_wdv_type(newLclInds1D_dev);
3958  }
3959 
3960  // We've reindexed, so we don't know if the indices are sorted.
3961  //
3962  // FIXME (mfh 17 Sep 2014) It could make sense to check this,
3963  // since we're already going through all the indices above. We
3964  // could also sort each row in place; that way, we would only
3965  // have to make one pass over the rows.
3966  indicesAreSorted_ = false;
3967  if (sortIndicesInEachRow) {
3968  // NOTE (mfh 17 Sep 2014) The graph must be locally indexed in
3969  // order to call this method.
3970  //
3971  // FIXME (mfh 17 Sep 2014) This violates the strong exception
3972  // guarantee. It would be better to sort the new index arrays
3973  // before committing them.
3974  const bool sorted = false; // need to resort
3975  const bool merged = true; // no need to merge, since no dups
3976  this->sortAndMergeAllIndices (sorted, merged);
3977  }
3978  }
3979  colMap_ = newColMap;
3980 
3981  if (newImport.is_null ()) {
3982  // FIXME (mfh 19 Aug 2014) Should use the above all-reduce to
3983  // check whether the input Import is null on any process.
3984  //
3985  // If the domain Map hasn't been set yet, we can't compute a new
3986  // Import object. Leave it what it is; it should be null, but
3987  // it doesn't matter. If the domain Map _has_ been set, then
3988  // compute a new Import object if necessary.
3989  if (! domainMap_.is_null ()) {
3990  if (! domainMap_->isSameAs (* newColMap)) {
3991  importer_ = Teuchos::rcp (new import_type (domainMap_, newColMap));
3992  } else {
3993  importer_ = Teuchos::null; // don't need an Import
3994  }
3995  }
3996  } else {
3997  // The caller gave us an Import object. Assume that it's valid.
3998  importer_ = newImport;
3999  }
4000  }
4001 
4002  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4003  void
4005  replaceDomainMap (const Teuchos::RCP<const map_type>& newDomainMap)
4006  {
4007  const char prefix[] = "Tpetra::CrsGraph::replaceDomainMap: ";
4008  TEUCHOS_TEST_FOR_EXCEPTION(
4009  colMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4010  "this method unless the graph already has a column Map.");
4011  TEUCHOS_TEST_FOR_EXCEPTION(
4012  newDomainMap.is_null (), std::invalid_argument,
4013  prefix << "The new domain Map must be nonnull.");
4014 
4015  // Create a new importer, if needed
4016  Teuchos::RCP<const import_type> newImporter = Teuchos::null;
4017  if (newDomainMap != colMap_ && (! newDomainMap->isSameAs (*colMap_))) {
4018  newImporter = rcp(new import_type(newDomainMap, colMap_));
4019  }
4020  this->replaceDomainMapAndImporter(newDomainMap, newImporter);
4021  }
4022 
4023  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4024  void
4026  replaceDomainMapAndImporter (const Teuchos::RCP<const map_type>& newDomainMap,
4027  const Teuchos::RCP<const import_type>& newImporter)
4028  {
4029  const char prefix[] = "Tpetra::CrsGraph::replaceDomainMapAndImporter: ";
4030  TEUCHOS_TEST_FOR_EXCEPTION(
4031  colMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4032  "this method unless the graph already has a column Map.");
4033  TEUCHOS_TEST_FOR_EXCEPTION(
4034  newDomainMap.is_null (), std::invalid_argument,
4035  prefix << "The new domain Map must be nonnull.");
4036 
4037  if (debug_) {
4038  if (newImporter.is_null ()) {
4039  // It's not a good idea to put expensive operations in a macro
4040  // clause, even if they are side effect - free, because macros
4041  // don't promise that they won't evaluate their arguments more
4042  // than once. It's polite for them to do so, but not required.
4043  const bool colSameAsDom = colMap_->isSameAs (*newDomainMap);
4044  TEUCHOS_TEST_FOR_EXCEPTION
4045  (!colSameAsDom, std::invalid_argument, "If the new Import is null, "
4046  "then the new domain Map must be the same as the current column Map.");
4047  }
4048  else {
4049  const bool colSameAsTgt =
4050  colMap_->isSameAs (* (newImporter->getTargetMap ()));
4051  const bool newDomSameAsSrc =
4052  newDomainMap->isSameAs (* (newImporter->getSourceMap ()));
4053  TEUCHOS_TEST_FOR_EXCEPTION
4054  (! colSameAsTgt || ! newDomSameAsSrc, std::invalid_argument, "If the "
4055  "new Import is nonnull, then the current column Map must be the same "
4056  "as the new Import's target Map, and the new domain Map must be the "
4057  "same as the new Import's source Map.");
4058  }
4059  }
4060 
4061  domainMap_ = newDomainMap;
4062  importer_ = Teuchos::rcp_const_cast<import_type> (newImporter);
4063  }
4064 
4065  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4066  void
4068  replaceRangeMap (const Teuchos::RCP<const map_type>& newRangeMap)
4069  {
4070  const char prefix[] = "Tpetra::CrsGraph::replaceRangeMap: ";
4071  TEUCHOS_TEST_FOR_EXCEPTION(
4072  rowMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4073  "this method unless the graph already has a row Map.");
4074  TEUCHOS_TEST_FOR_EXCEPTION(
4075  newRangeMap.is_null (), std::invalid_argument,
4076  prefix << "The new range Map must be nonnull.");
4077 
4078  // Create a new exporter, if needed
4079  Teuchos::RCP<const export_type> newExporter = Teuchos::null;
4080  if (newRangeMap != rowMap_ && (! newRangeMap->isSameAs (*rowMap_))) {
4081  newExporter = rcp(new export_type(rowMap_, newRangeMap));
4082  }
4083  this->replaceRangeMapAndExporter(newRangeMap, newExporter);
4084  }
4085 
4086  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4087  void
4089  replaceRangeMapAndExporter (const Teuchos::RCP<const map_type>& newRangeMap,
4090  const Teuchos::RCP<const export_type>& newExporter)
4091  {
4092  const char prefix[] = "Tpetra::CrsGraph::replaceRangeMapAndExporter: ";
4093  TEUCHOS_TEST_FOR_EXCEPTION(
4094  rowMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4095  "this method unless the graph already has a column Map.");
4096  TEUCHOS_TEST_FOR_EXCEPTION(
4097  newRangeMap.is_null (), std::invalid_argument,
4098  prefix << "The new domain Map must be nonnull.");
4099 
4100  if (debug_) {
4101  if (newExporter.is_null ()) {
4102  // It's not a good idea to put expensive operations in a macro
4103  // clause, even if they are side effect - free, because macros
4104  // don't promise that they won't evaluate their arguments more
4105  // than once. It's polite for them to do so, but not required.
4106  const bool rowSameAsRange = rowMap_->isSameAs (*newRangeMap);
4107  TEUCHOS_TEST_FOR_EXCEPTION
4108  (!rowSameAsRange, std::invalid_argument, "If the new Export is null, "
4109  "then the new range Map must be the same as the current row Map.");
4110  }
4111  else {
4112  const bool newRangeSameAsTgt =
4113  newRangeMap->isSameAs (* (newExporter->getTargetMap ()));
4114  const bool rowSameAsSrc =
4115  rowMap_->isSameAs (* (newExporter->getSourceMap ()));
4116  TEUCHOS_TEST_FOR_EXCEPTION
4117  (! rowSameAsSrc || ! newRangeSameAsTgt, std::invalid_argument, "If the "
4118  "new Export is nonnull, then the current row Map must be the same "
4119  "as the new Export's source Map, and the new range Map must be the "
4120  "same as the new Export's target Map.");
4121  }
4122  }
4123 
4124  rangeMap_ = newRangeMap;
4125  exporter_ = Teuchos::rcp_const_cast<export_type> (newExporter);
4126  }
4127 
4128 
4129  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4133  {
4134  return local_graph_device_type(
4135  lclIndsPacked_wdv.getDeviceView(Access::ReadWrite),
4136  this->getRowPtrsPackedDevice());
4137  }
4138 
4139  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4142  getLocalGraphHost () const
4143  {
4144  return local_graph_host_type(
4145  lclIndsPacked_wdv.getHostView(Access::ReadWrite),
4146  this->getRowPtrsPackedHost());
4147  }
4148 
4149  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4150  void
4153  {
4154  using ::Tpetra::Details::ProfilingRegion;
4155  using Teuchos::ArrayView;
4156  using Teuchos::outArg;
4157  using Teuchos::reduceAll;
4158  typedef global_size_t GST;
4159 
4160  ProfilingRegion regionCGC ("Tpetra::CrsGraph::computeGlobalConstants");
4161 
4162  this->computeLocalConstants ();
4163 
4164  // Compute global constants from local constants. Processes that
4165  // already have local constants still participate in the
4166  // all-reduces, using their previously computed values.
4167  if (! this->haveGlobalConstants_) {
4168  const Teuchos::Comm<int>& comm = * (this->getComm ());
4169  // Promote all the nodeNum* and nodeMaxNum* quantities from
4170  // size_t to global_size_t, when doing the all-reduces for
4171  // globalNum* / globalMaxNum* results.
4172  //
4173  // FIXME (mfh 07 May 2013) Unfortunately, we either have to do
4174  // this in two all-reduces (one for the sum and the other for
4175  // the max), or use a custom MPI_Op that combines the sum and
4176  // the max. The latter might even be slower than two
4177  // all-reduces on modern network hardware. It would also be a
4178  // good idea to use nonblocking all-reduces (MPI 3), so that we
4179  // don't have to wait around for the first one to finish before
4180  // starting the second one.
4181  GST lcl, gbl;
4182  lcl = static_cast<GST> (this->getLocalNumEntries ());
4183 
4184  reduceAll<int,GST> (comm, Teuchos::REDUCE_SUM, 1, &lcl, &gbl);
4185  this->globalNumEntries_ = gbl;
4186 
4187  const GST lclMaxNumRowEnt = static_cast<GST> (this->nodeMaxNumRowEntries_);
4188  reduceAll<int, GST> (comm, Teuchos::REDUCE_MAX, lclMaxNumRowEnt,
4189  outArg (this->globalMaxNumRowEntries_));
4190  this->haveGlobalConstants_ = true;
4191  }
4192  }
4193 
4194 
4195  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4196  void
4199  {
4200  using ::Tpetra::Details::ProfilingRegion;
4201 
4202  ProfilingRegion regionCLC ("Tpetra::CrsGraph::computeLocalConstants");
4203  if (this->haveLocalConstants_) {
4204  return;
4205  }
4206 
4207  // Reset local properties
4208  this->nodeMaxNumRowEntries_ =
4209  Teuchos::OrdinalTraits<size_t>::invalid();
4210 
4211  using LO = local_ordinal_type;
4212 
4213  auto ptr = this->getRowPtrsPackedDevice();
4214  const LO lclNumRows = ptr.extent(0) == 0 ?
4215  static_cast<LO> (0) :
4216  (static_cast<LO> (ptr.extent(0)) - static_cast<LO> (1));
4217 
4218  const LO lclMaxNumRowEnt =
4219  ::Tpetra::Details::maxDifference ("Tpetra::CrsGraph: nodeMaxNumRowEntries",
4220  ptr, lclNumRows);
4221  this->nodeMaxNumRowEntries_ = static_cast<size_t> (lclMaxNumRowEnt);
4222  this->haveLocalConstants_ = true;
4223  }
4224 
4225 
4226  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4227  std::pair<size_t, std::string>
4229  makeIndicesLocal (const bool verbose)
4230  {
4232  using Teuchos::arcp;
4233  using Teuchos::Array;
4234  using std::endl;
4235  typedef LocalOrdinal LO;
4236  typedef GlobalOrdinal GO;
4237  typedef device_type DT;
4238  typedef typename local_graph_device_type::row_map_type::non_const_value_type offset_type;
4239  typedef typename num_row_entries_type::non_const_value_type num_ent_type;
4240  const char tfecfFuncName[] = "makeIndicesLocal: ";
4241  ProfilingRegion regionMakeIndicesLocal ("Tpetra::CrsGraph::makeIndicesLocal");
4242 
4243  std::unique_ptr<std::string> prefix;
4244  if (verbose) {
4245  prefix = this->createPrefix("CrsGraph", "makeIndicesLocal");
4246  std::ostringstream os;
4247  os << *prefix << "lclNumRows: " << getLocalNumRows() << endl;
4248  std::cerr << os.str();
4249  }
4250 
4251  // These are somewhat global properties, so it's safe to have
4252  // exception checks for them, rather than returning an error code.
4253  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4254  (! this->hasColMap (), std::logic_error, "The graph does not have a "
4255  "column Map yet. This method should never be called in that case. "
4256  "Please report this bug to the Tpetra developers.");
4257  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4258  (this->getColMap ().is_null (), std::logic_error, "The graph claims "
4259  "that it has a column Map, because hasColMap() returns true. However, "
4260  "the result of getColMap() is null. This should never happen. Please "
4261  "report this bug to the Tpetra developers.");
4262 
4263  // Return value 1: The number of column indices (counting
4264  // duplicates) that could not be converted to local indices,
4265  // because they were not in the column Map on the calling process.
4266  size_t lclNumErrs = 0;
4267  std::ostringstream errStrm; // for return value 2 (error string)
4268 
4269  const LO lclNumRows = static_cast<LO> (this->getLocalNumRows ());
4270  const map_type& colMap = * (this->getColMap ());
4271 
4272  if (this->isGloballyIndexed () && lclNumRows != 0) {
4273  // This is a host-accessible View.
4274  typename num_row_entries_type::const_type h_numRowEnt =
4275  this->k_numRowEntries_;
4276 
4277  auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
4278 
4279  // Allocate space for local indices.
4280  if (rowPtrsUnpacked_host.extent(0) == 0) {
4281  errStrm << "Unpacked row pointers (rowPtrsUnpacked_dev_) has length 0. This should never "
4282  "happen here. Please report this bug to the Tpetra developers."
4283  << endl;
4284  // Need to return early.
4285  return std::make_pair(Tpetra::Details::OrdinalTraits<size_t>::invalid (),
4286  errStrm.str ());
4287  }
4288  const auto numEnt = rowPtrsUnpacked_host(lclNumRows);
4289 
4290  // mfh 17 Dec 2016: We don't need initial zero-fill of
4291  // lclIndsUnpacked_wdv, because we will fill it below anyway.
4292  // AllowPadding would only help for aligned access (e.g.,
4293  // for vectorization) if we also were to pad each row to the
4294  // same alignment, so we'll skip AllowPadding for now.
4295 
4296  // using Kokkos::AllowPadding;
4297  using Kokkos::view_alloc;
4298  using Kokkos::WithoutInitializing;
4299 
4300  // When giving the label as an argument to
4301  // Kokkos::view_alloc, the label must be a string and not a
4302  // char*, else the code won't compile. This is because
4303  // view_alloc also allows a raw pointer as its first
4304  // argument. See
4305  // https://github.com/kokkos/kokkos/issues/434. This is a
4306  // large allocation typically, so the overhead of creating
4307  // an std::string is minor.
4308  const std::string label ("Tpetra::CrsGraph::lclInd");
4309  if (verbose) {
4310  std::ostringstream os;
4311  os << *prefix << "(Re)allocate lclInd_wdv: old="
4312  << lclIndsUnpacked_wdv.extent(0) << ", new=" << numEnt << endl;
4313  std::cerr << os.str();
4314  }
4315 
4316  local_inds_dualv_type lclInds_dualv =
4317  local_inds_dualv_type(view_alloc(label, WithoutInitializing),
4318  numEnt);
4319  lclIndsUnpacked_wdv = local_inds_wdv_type(lclInds_dualv);
4320 
4321  auto lclColMap = colMap.getLocalMap ();
4322  // This is a "device mirror" of the host View h_numRowEnt.
4323  //
4324  // NOTE (mfh 27 Sep 2016) Currently, the right way to get a
4325  // Device instance is to use its default constructor. See the
4326  // following Kokkos issue:
4327  //
4328  // https://github.com/kokkos/kokkos/issues/442
4329  if (verbose) {
4330  std::ostringstream os;
4331  os << *prefix << "Allocate device mirror k_numRowEnt: "
4332  << h_numRowEnt.extent(0) << endl;
4333  std::cerr << os.str();
4334  }
4335  auto k_numRowEnt =
4336  Kokkos::create_mirror_view_and_copy (device_type (), h_numRowEnt);
4337 
4339  lclNumErrs =
4340  convertColumnIndicesFromGlobalToLocal<LO, GO, DT, offset_type, num_ent_type> (
4341  lclIndsUnpacked_wdv.getDeviceView(Access::OverwriteAll),
4342  gblInds_wdv.getDeviceView(Access::ReadOnly),
4343  this->getRowPtrsUnpackedDevice(),
4344  lclColMap,
4345  k_numRowEnt);
4346  if (lclNumErrs != 0) {
4347  const int myRank = [this] () {
4348  auto map = this->getMap ();
4349  if (map.is_null ()) {
4350  return 0;
4351  }
4352  else {
4353  auto comm = map->getComm ();
4354  return comm.is_null () ? 0 : comm->getRank ();
4355  }
4356  } ();
4357  const bool pluralNumErrs = (lclNumErrs != static_cast<size_t> (1));
4358  errStrm << "(Process " << myRank << ") When converting column "
4359  "indices from global to local, we encountered " << lclNumErrs
4360  << " ind" << (pluralNumErrs ? "ices" : "ex")
4361  << " that do" << (pluralNumErrs ? "es" : "")
4362  << " not live in the column Map on this process." << endl;
4363  }
4364 
4365  // We've converted column indices from global to local, so we
4366  // can deallocate the global column indices (which we know are
4367  // in 1-D storage, because the graph has static profile).
4368  if (verbose) {
4369  std::ostringstream os;
4370  os << *prefix << "Free gblInds_wdv: "
4371  << gblInds_wdv.extent(0) << endl;
4372  std::cerr << os.str();
4373  }
4374  gblInds_wdv = global_inds_wdv_type ();
4375  } // globallyIndexed() && lclNumRows > 0
4376 
4377  this->indicesAreLocal_ = true;
4378  this->indicesAreGlobal_ = false;
4379  this->checkInternalState ();
4380 
4381  return std::make_pair (lclNumErrs, errStrm.str ());
4382  }
4383 
4384  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4385  void
4387  makeColMap (Teuchos::Array<int>& remotePIDs)
4388  {
4390  using std::endl;
4391  const char tfecfFuncName[] = "makeColMap";
4392 
4393  ProfilingRegion regionSortAndMerge ("Tpetra::CrsGraph::makeColMap");
4394  std::unique_ptr<std::string> prefix;
4395  if (verbose_) {
4396  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4397  std::ostringstream os;
4398  os << *prefix << "Start" << endl;
4399  std::cerr << os.str();
4400  }
4401 
4402  // this->colMap_ should be null at this point, but we accept the
4403  // future possibility that it might not be (esp. if we decide
4404  // later to support graph structure changes after first
4405  // fillComplete, which CrsGraph does not currently (as of 12 Feb
4406  // 2017) support).
4407  Teuchos::RCP<const map_type> colMap = this->colMap_;
4408  const bool sortEachProcsGids =
4409  this->sortGhostsAssociatedWithEachProcessor_;
4410 
4411  // FIXME (mfh 12 Feb 2017) ::Tpetra::Details::makeColMap returns a
4412  // per-process error code. If an error does occur on a process,
4413  // ::Tpetra::Details::makeColMap does NOT promise that all processes will
4414  // notice that error. This is the caller's responsibility. For
4415  // now, we only propagate (to all processes) and report the error
4416  // in debug mode. In the future, we need to add the local/global
4417  // error handling scheme used in BlockCrsMatrix to this class.
4418  if (debug_) {
4419  using Teuchos::outArg;
4420  using Teuchos::REDUCE_MIN;
4421  using Teuchos::reduceAll;
4422 
4423  std::ostringstream errStrm;
4424  const int lclErrCode =
4425  Details::makeColMap (colMap, remotePIDs,
4426  getDomainMap (), *this, sortEachProcsGids, &errStrm);
4427  auto comm = this->getComm ();
4428  if (! comm.is_null ()) {
4429  const int lclSuccess = (lclErrCode == 0) ? 1 : 0;
4430  int gblSuccess = 0; // output argument
4431  reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess,
4432  outArg (gblSuccess));
4433  if (gblSuccess != 1) {
4434  std::ostringstream os;
4435  Details::gathervPrint (os, errStrm.str (), *comm);
4436  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4437  (true, std::runtime_error, ": An error happened on at "
4438  "least one process in the CrsGraph's communicator. "
4439  "Here are all processes' error messages:" << std::endl
4440  << os.str ());
4441  }
4442  }
4443  }
4444  else {
4445  (void) Details::makeColMap (colMap, remotePIDs,
4446  getDomainMap (), *this, sortEachProcsGids, nullptr);
4447  }
4448  // See above. We want to admit the possibility of makeColMap
4449  // actually revising an existing column Map, even though that
4450  // doesn't currently (as of 10 May 2017) happen.
4451  this->colMap_ = colMap;
4452 
4453  checkInternalState ();
4454  if (verbose_) {
4455  std::ostringstream os;
4456  os << *prefix << "Done" << endl;
4457  std::cerr << os.str();
4458  }
4459  }
4460 
4461 
4462  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4463  void
4465  sortAndMergeAllIndices (const bool sorted, const bool merged)
4466  {
4467  using std::endl;
4468  using LO = LocalOrdinal;
4469  using host_execution_space =
4470  typename Kokkos::View<LO*, device_type>::HostMirror::
4471  execution_space;
4472  using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
4473  const char tfecfFuncName[] = "sortAndMergeAllIndices";
4474  Details::ProfilingRegion regionSortAndMerge
4475  ("Tpetra::CrsGraph::sortAndMergeAllIndices");
4476 
4477  std::unique_ptr<std::string> prefix;
4478  if (verbose_) {
4479  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4480  std::ostringstream os;
4481  os << *prefix << "Start: "
4482  << "sorted=" << (sorted ? "true" : "false")
4483  << ", merged=" << (merged ? "true" : "false") << endl;
4484  std::cerr << os.str();
4485  }
4486  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4487  (this->isGloballyIndexed(), std::logic_error,
4488  "This method may only be called after makeIndicesLocal." );
4489  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4490  (! merged && this->isStorageOptimized(), std::logic_error,
4491  "The graph is already storage optimized, so we shouldn't be "
4492  "merging any indices. "
4493  "Please report this bug to the Tpetra developers.");
4494 
4495  if (! sorted || ! merged) {
4496  const LO lclNumRows(this->getLocalNumRows());
4497  auto range = range_type(0, lclNumRows);
4498 
4499  if (verbose_) {
4500  size_t totalNumDups = 0;
4501  //Sync and mark-modified the local indices before disabling WDV tracking
4502  lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
4504  Kokkos::parallel_reduce(range,
4505  [this, sorted, merged] (const LO lclRow, size_t& numDups)
4506  {
4507  const RowInfo rowInfo = this->getRowInfo(lclRow);
4508  numDups += this->sortAndMergeRowIndices(rowInfo, sorted, merged);
4509  },
4510  totalNumDups);
4512  std::ostringstream os;
4513  os << *prefix << "totalNumDups=" << totalNumDups << endl;
4514  std::cerr << os.str();
4515  }
4516  else {
4517  //Sync and mark-modified the local indices before disabling WDV tracking
4518  lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
4520  Kokkos::parallel_for(range,
4521  [this, sorted, merged] (const LO lclRow)
4522  {
4523  const RowInfo rowInfo = this->getRowInfo(lclRow);
4524  this->sortAndMergeRowIndices(rowInfo, sorted, merged);
4525  });
4527  }
4528  this->indicesAreSorted_ = true; // we just sorted every row
4529  this->noRedundancies_ = true; // we just merged every row
4530  }
4531 
4532  if (verbose_) {
4533  std::ostringstream os;
4534  os << *prefix << "Done" << endl;
4535  std::cerr << os.str();
4536  }
4537  }
4538 
4539  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4540  void
4542  makeImportExport (Teuchos::Array<int>& remotePIDs,
4543  const bool useRemotePIDs)
4544  {
4545  using ::Tpetra::Details::ProfilingRegion;
4546  using Teuchos::ParameterList;
4547  using Teuchos::RCP;
4548  using Teuchos::rcp;
4549  const char tfecfFuncName[] = "makeImportExport: ";
4550  ProfilingRegion regionMIE ("Tpetra::CrsGraph::makeImportExport");
4551 
4552  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4553  (! this->hasColMap (), std::logic_error,
4554  "This method may not be called unless the graph has a column Map.");
4555  RCP<ParameterList> params = this->getNonconstParameterList (); // could be null
4556 
4557  // Don't do any checks to see if we need to create the Import, if
4558  // it exists already.
4559  //
4560  // FIXME (mfh 25 Mar 2013) This will become incorrect if we
4561  // change CrsGraph in the future to allow changing the column
4562  // Map after fillComplete. For now, the column Map is fixed
4563  // after the first fillComplete call.
4564  if (importer_.is_null ()) {
4565  // Create the Import instance if necessary.
4566  if (domainMap_ != colMap_ && (! domainMap_->isSameAs (*colMap_))) {
4567  if (params.is_null () || ! params->isSublist ("Import")) {
4568  if (useRemotePIDs) {
4569  importer_ = rcp (new import_type (domainMap_, colMap_, remotePIDs));
4570  }
4571  else {
4572  importer_ = rcp (new import_type (domainMap_, colMap_));
4573  }
4574  }
4575  else {
4576  RCP<ParameterList> importSublist = sublist (params, "Import", true);
4577  if (useRemotePIDs) {
4578  RCP<import_type> newImp =
4579  rcp (new import_type (domainMap_, colMap_, remotePIDs,
4580  importSublist));
4581  importer_ = newImp;
4582  }
4583  else {
4584  importer_ = rcp (new import_type (domainMap_, colMap_, importSublist));
4585  }
4586  }
4587  }
4588  }
4589 
4590  // Don't do any checks to see if we need to create the Export, if
4591  // it exists already.
4592  if (exporter_.is_null ()) {
4593  // Create the Export instance if necessary.
4594  if (rangeMap_ != rowMap_ && ! rangeMap_->isSameAs (*rowMap_)) {
4595  if (params.is_null () || ! params->isSublist ("Export")) {
4596  exporter_ = rcp (new export_type (rowMap_, rangeMap_));
4597  }
4598  else {
4599  RCP<ParameterList> exportSublist = sublist (params, "Export", true);
4600  exporter_ = rcp (new export_type (rowMap_, rangeMap_, exportSublist));
4601  }
4602  }
4603  }
4604  }
4605 
4606 
4607  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4608  std::string
4611  {
4612  std::ostringstream oss;
4613  oss << dist_object_type::description ();
4614  if (isFillComplete ()) {
4615  oss << "{status = fill complete"
4616  << ", global rows = " << getGlobalNumRows()
4617  << ", global cols = " << getGlobalNumCols()
4618  << ", global num entries = " << getGlobalNumEntries()
4619  << "}";
4620  }
4621  else {
4622  oss << "{status = fill not complete"
4623  << ", global rows = " << getGlobalNumRows()
4624  << "}";
4625  }
4626  return oss.str();
4627  }
4628 
4629 
4630  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4631  void
4633  describe (Teuchos::FancyOStream &out,
4634  const Teuchos::EVerbosityLevel verbLevel) const
4635  {
4636  using Teuchos::ArrayView;
4637  using Teuchos::Comm;
4638  using Teuchos::RCP;
4639  using Teuchos::VERB_DEFAULT;
4640  using Teuchos::VERB_NONE;
4641  using Teuchos::VERB_LOW;
4642  using Teuchos::VERB_MEDIUM;
4643  using Teuchos::VERB_HIGH;
4644  using Teuchos::VERB_EXTREME;
4645  using std::endl;
4646  using std::setw;
4647 
4648  Teuchos::EVerbosityLevel vl = verbLevel;
4649  if (vl == VERB_DEFAULT) vl = VERB_LOW;
4650  RCP<const Comm<int> > comm = this->getComm();
4651  const int myImageID = comm->getRank(),
4652  numImages = comm->getSize();
4653  size_t width = 1;
4654  for (size_t dec=10; dec<getGlobalNumRows(); dec *= 10) {
4655  ++width;
4656  }
4657  width = std::max<size_t> (width, static_cast<size_t> (11)) + 2;
4658  Teuchos::OSTab tab (out);
4659  // none: print nothing
4660  // low: print O(1) info from node 0
4661  // medium: print O(P) info, num entries per node
4662  // high: print O(N) info, num entries per row
4663  // extreme: print O(NNZ) info: print graph indices
4664  //
4665  // for medium and higher, print constituent objects at specified verbLevel
4666  if (vl != VERB_NONE) {
4667  if (myImageID == 0) out << this->description() << std::endl;
4668  // O(1) globals, minus what was already printed by description()
4669  if (isFillComplete() && myImageID == 0) {
4670  out << "Global max number of row entries = " << globalMaxNumRowEntries_ << std::endl;
4671  }
4672  // constituent objects
4673  if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
4674  if (myImageID == 0) out << "\nRow map: " << std::endl;
4675  rowMap_->describe(out,vl);
4676  if (colMap_ != Teuchos::null) {
4677  if (myImageID == 0) out << "\nColumn map: " << std::endl;
4678  colMap_->describe(out,vl);
4679  }
4680  if (domainMap_ != Teuchos::null) {
4681  if (myImageID == 0) out << "\nDomain map: " << std::endl;
4682  domainMap_->describe(out,vl);
4683  }
4684  if (rangeMap_ != Teuchos::null) {
4685  if (myImageID == 0) out << "\nRange map: " << std::endl;
4686  rangeMap_->describe(out,vl);
4687  }
4688  }
4689  // O(P) data
4690  if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
4691  for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
4692  if (myImageID == imageCtr) {
4693  out << "Node ID = " << imageCtr << std::endl
4694  << "Node number of entries = " << this->getLocalNumEntries () << std::endl
4695  << "Node max number of entries = " << nodeMaxNumRowEntries_ << std::endl;
4696  if (! indicesAreAllocated ()) {
4697  out << "Indices are not allocated." << std::endl;
4698  }
4699  }
4700  comm->barrier();
4701  comm->barrier();
4702  comm->barrier();
4703  }
4704  }
4705  // O(N) and O(NNZ) data
4706  if (vl == VERB_HIGH || vl == VERB_EXTREME) {
4707  for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
4708  if (myImageID == imageCtr) {
4709  out << std::setw(width) << "Node ID"
4710  << std::setw(width) << "Global Row"
4711  << std::setw(width) << "Num Entries";
4712  if (vl == VERB_EXTREME) {
4713  out << " Entries";
4714  }
4715  out << std::endl;
4716  const LocalOrdinal lclNumRows =
4717  static_cast<LocalOrdinal> (this->getLocalNumRows ());
4718  for (LocalOrdinal r=0; r < lclNumRows; ++r) {
4719  const RowInfo rowinfo = this->getRowInfo (r);
4720  GlobalOrdinal gid = rowMap_->getGlobalElement(r);
4721  out << std::setw(width) << myImageID
4722  << std::setw(width) << gid
4723  << std::setw(width) << rowinfo.numEntries;
4724  if (vl == VERB_EXTREME) {
4725  out << " ";
4726  if (isGloballyIndexed()) {
4727  auto rowview = gblInds_wdv.getHostView(Access::ReadOnly);
4728  for (size_t j=0; j < rowinfo.numEntries; ++j){
4729  GlobalOrdinal colgid = rowview[j + rowinfo.offset1D];
4730  out << colgid << " ";
4731  }
4732  }
4733  else if (isLocallyIndexed()) {
4734  auto rowview = lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
4735  for (size_t j=0; j < rowinfo.numEntries; ++j) {
4736  LocalOrdinal collid = rowview[j + rowinfo.offset1D];
4737  out << colMap_->getGlobalElement(collid) << " ";
4738  }
4739  }
4740  }
4741  out << std::endl;
4742  }
4743  }
4744  comm->barrier();
4745  comm->barrier();
4746  comm->barrier();
4747  }
4748  }
4749  }
4750  }
4751 
4752 
4753  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4754  bool
4756  checkSizes (const SrcDistObject& /* source */)
4757  {
4758  // It's not clear what kind of compatibility checks on sizes can
4759  // be performed here. Epetra_CrsGraph doesn't check any sizes for
4760  // compatibility.
4761  return true;
4762  }
4763 
4764  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4765  void
4768  (const SrcDistObject& source,
4769  const size_t numSameIDs,
4770  const Kokkos::DualView<const local_ordinal_type*,
4771  buffer_device_type>& permuteToLIDs,
4772  const Kokkos::DualView<const local_ordinal_type*,
4773  buffer_device_type>& permuteFromLIDs,
4774  const CombineMode /*CM*/)
4775  {
4776  using std::endl;
4777  using LO = local_ordinal_type;
4778  using GO = global_ordinal_type;
4779  using this_CRS_type = CrsGraph<LO, GO, node_type>;
4780  const char tfecfFuncName[] = "copyAndPermute: ";
4781  const bool verbose = verbose_;
4782 
4783  std::unique_ptr<std::string> prefix;
4784  if (verbose) {
4785  prefix = this->createPrefix("CrsGraph", "copyAndPermute");
4786  std::ostringstream os;
4787  os << *prefix << endl;
4788  std::cerr << os.str ();
4789  }
4790 
4791  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4792  (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0),
4793  std::runtime_error, "permuteToLIDs.extent(0) = "
4794  << permuteToLIDs.extent (0) << " != permuteFromLIDs.extent(0) = "
4795  << permuteFromLIDs.extent (0) << ".");
4796 
4797  // We know from checkSizes that the source object is a
4798  // row_graph_type, so we don't need to check again.
4799  const row_graph_type& srcRowGraph =
4800  dynamic_cast<const row_graph_type&> (source);
4801 
4802  if (verbose) {
4803  std::ostringstream os;
4804  os << *prefix << "Compute padding" << endl;
4805  std::cerr << os.str ();
4806  }
4807  auto padding = computeCrsPadding(srcRowGraph, numSameIDs,
4808  permuteToLIDs, permuteFromLIDs, verbose);
4809  applyCrsPadding(*padding, verbose);
4810 
4811  // If the source object is actually a CrsGraph, we can use view
4812  // mode instead of copy mode to access the entries in each row,
4813  // if the graph is not fill complete.
4814  const this_CRS_type* srcCrsGraph =
4815  dynamic_cast<const this_CRS_type*> (&source);
4816 
4817  const map_type& srcRowMap = *(srcRowGraph.getRowMap());
4818  const map_type& tgtRowMap = *(getRowMap());
4819  const bool src_filled = srcRowGraph.isFillComplete();
4820  nonconst_global_inds_host_view_type row_copy;
4821  LO myid = 0;
4822 
4823  //
4824  // "Copy" part of "copy and permute."
4825  //
4826  if (src_filled || srcCrsGraph == nullptr) {
4827  if (verbose) {
4828  std::ostringstream os;
4829  os << *prefix << "src_filled || srcCrsGraph == nullptr" << endl;
4830  std::cerr << os.str ();
4831  }
4832  // If the source graph is fill complete, we can't use view mode,
4833  // because the data might be stored in a different format not
4834  // compatible with the expectations of view mode. Also, if the
4835  // source graph is not a CrsGraph, we can't use view mode,
4836  // because RowGraph only provides copy mode access to the data.
4837  for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
4838  const GO gid = srcRowMap.getGlobalElement (myid);
4839  size_t row_length = srcRowGraph.getNumEntriesInGlobalRow (gid);
4840  Kokkos::resize(row_copy,row_length);
4841  size_t check_row_length = 0;
4842  srcRowGraph.getGlobalRowCopy (gid, row_copy, check_row_length);
4843  this->insertGlobalIndices (gid, row_length, row_copy.data());
4844  }
4845  } else {
4846  if (verbose) {
4847  std::ostringstream os;
4848  os << *prefix << "! src_filled && srcCrsGraph != nullptr" << endl;
4849  std::cerr << os.str ();
4850  }
4851  for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
4852  const GO gid = srcRowMap.getGlobalElement (myid);
4853  global_inds_host_view_type row;
4854  srcCrsGraph->getGlobalRowView (gid, row);
4855  this->insertGlobalIndices (gid, row.extent(0), row.data());
4856  }
4857  }
4858 
4859  //
4860  // "Permute" part of "copy and permute."
4861  //
4862  auto permuteToLIDs_h = permuteToLIDs.view_host ();
4863  auto permuteFromLIDs_h = permuteFromLIDs.view_host ();
4864 
4865  if (src_filled || srcCrsGraph == nullptr) {
4866  for (LO i = 0; i < static_cast<LO> (permuteToLIDs_h.extent (0)); ++i) {
4867  const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs_h[i]);
4868  const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs_h[i]);
4869  size_t row_length = srcRowGraph.getNumEntriesInGlobalRow (srcgid);
4870  Kokkos::resize(row_copy,row_length);
4871  size_t check_row_length = 0;
4872  srcRowGraph.getGlobalRowCopy (srcgid, row_copy, check_row_length);
4873  this->insertGlobalIndices (mygid, row_length, row_copy.data());
4874  }
4875  } else {
4876  for (LO i = 0; i < static_cast<LO> (permuteToLIDs_h.extent (0)); ++i) {
4877  const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs_h[i]);
4878  const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs_h[i]);
4879  global_inds_host_view_type row;
4880  srcCrsGraph->getGlobalRowView (srcgid, row);
4881  this->insertGlobalIndices (mygid, row.extent(0), row.data());
4882  }
4883  }
4884 
4885  if (verbose) {
4886  std::ostringstream os;
4887  os << *prefix << "Done" << endl;
4888  std::cerr << os.str ();
4889  }
4890  }
4891 
4892  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4893  void
4895  applyCrsPadding(const padding_type& padding,
4896  const bool verbose)
4897  {
4899  using Details::padCrsArrays;
4900  using std::endl;
4901  using LO = local_ordinal_type;
4902  using row_ptrs_type =
4903  typename local_graph_device_type::row_map_type::non_const_type;
4904  using range_policy =
4905  Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
4906  const char tfecfFuncName[] = "applyCrsPadding";
4907  ProfilingRegion regionCAP("Tpetra::CrsGraph::applyCrsPadding");
4908 
4909  std::unique_ptr<std::string> prefix;
4910  if (verbose) {
4911  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4912  std::ostringstream os;
4913  os << *prefix << "padding: ";
4914  padding.print(os);
4915  os << endl;
4916  std::cerr << os.str();
4917  }
4918  const int myRank = ! verbose ? -1 : [&] () {
4919  auto map = this->getMap();
4920  if (map.is_null()) {
4921  return -1;
4922  }
4923  auto comm = map->getComm();
4924  if (comm.is_null()) {
4925  return -1;
4926  }
4927  return comm->getRank();
4928  } ();
4929 
4930  // FIXME (mfh 10 Feb 2020) We shouldn't actually reallocate
4931  // row_ptrs_beg or allocate row_ptrs_end unless the allocation
4932  // size needs to increase. That should be the job of
4933  // padCrsArrays.
4934 
4935  // Assume global indexing we don't have any indices yet
4936  if (! indicesAreAllocated()) {
4937  if (verbose) {
4938  std::ostringstream os;
4939  os << *prefix << "Call allocateIndices" << endl;
4940  std::cerr << os.str();
4941  }
4942  allocateIndices(GlobalIndices, verbose);
4943  }
4944  TEUCHOS_ASSERT( indicesAreAllocated() );
4945 
4946  // Making copies here because k_rowPtrs_ has a const type. Otherwise, we
4947  // would use it directly.
4948 
4949  auto rowPtrsUnpacked_dev = this->getRowPtrsUnpackedDevice();
4950  if (verbose) {
4951  std::ostringstream os;
4952  os << *prefix << "Allocate row_ptrs_beg: "
4953  << rowPtrsUnpacked_dev.extent(0) << endl;
4954  std::cerr << os.str();
4955  }
4956  using Kokkos::view_alloc;
4957  using Kokkos::WithoutInitializing;
4958  row_ptrs_type row_ptrs_beg(
4959  view_alloc("row_ptrs_beg", WithoutInitializing),
4960  rowPtrsUnpacked_dev.extent(0));
4961  // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
4962  Kokkos::deep_copy(execution_space(),row_ptrs_beg, rowPtrsUnpacked_dev);
4963 
4964  const size_t N = row_ptrs_beg.extent(0) == 0 ? size_t(0) :
4965  size_t(row_ptrs_beg.extent(0) - 1);
4966  if (verbose) {
4967  std::ostringstream os;
4968  os << *prefix << "Allocate row_ptrs_end: " << N << endl;
4969  std::cerr << os.str();
4970  }
4971  row_ptrs_type row_ptrs_end(
4972  view_alloc("row_ptrs_end", WithoutInitializing), N);
4973  row_ptrs_type num_row_entries;
4974 
4975  const bool refill_num_row_entries = k_numRowEntries_.extent(0) != 0;
4976 
4977  execution_space().fence(); // we need above deep_copy to be done
4978 
4979  if (refill_num_row_entries) { // Case 1: Unpacked storage
4980  // We can't assume correct *this capture until C++17, and it's
4981  // likely more efficient just to capture what we need anyway.
4982  num_row_entries =
4983  row_ptrs_type(view_alloc("num_row_entries", WithoutInitializing), N);
4984  Kokkos::deep_copy(num_row_entries, this->k_numRowEntries_);
4985  Kokkos::parallel_for
4986  ("Fill end row pointers", range_policy(0, N),
4987  KOKKOS_LAMBDA (const size_t i) {
4988  row_ptrs_end(i) = row_ptrs_beg(i) + num_row_entries(i);
4989  });
4990  }
4991  else {
4992  // FIXME (mfh 10 Feb 2020) Fix padCrsArrays so that if packed
4993  // storage, we don't need row_ptr_end to be separate allocation;
4994  // could just have it alias row_ptr_beg+1.
4995  Kokkos::parallel_for
4996  ("Fill end row pointers", range_policy(0, N),
4997  KOKKOS_LAMBDA (const size_t i) {
4998  row_ptrs_end(i) = row_ptrs_beg(i+1);
4999  });
5000  }
5001 
5002  if (isGloballyIndexed()) {
5003  padCrsArrays(row_ptrs_beg, row_ptrs_end, gblInds_wdv,
5004  padding, myRank, verbose);
5005  }
5006  else {
5007  padCrsArrays(row_ptrs_beg, row_ptrs_end, lclIndsUnpacked_wdv,
5008  padding, myRank, verbose);
5009  }
5010 
5011  if (refill_num_row_entries) {
5012  Kokkos::parallel_for
5013  ("Fill num entries", range_policy(0, N),
5014  KOKKOS_LAMBDA (const size_t i) {
5015  num_row_entries(i) = row_ptrs_end(i) - row_ptrs_beg(i);
5016  });
5017  Kokkos::deep_copy(this->k_numRowEntries_, num_row_entries);
5018  }
5019  if (verbose) {
5020  std::ostringstream os;
5021  os << *prefix << "Reassign k_rowPtrs_; old size: "
5022  << rowPtrsUnpacked_dev.extent(0) << ", new size: "
5023  << row_ptrs_beg.extent(0) << endl;
5024  std::cerr << os.str();
5025  TEUCHOS_ASSERT( rowPtrsUnpacked_dev.extent(0) == row_ptrs_beg.extent(0) );
5026  }
5027 
5028  setRowPtrsUnpacked(row_ptrs_beg);
5029  }
5030 
5031  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5032  std::unique_ptr<
5033  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5034  >
5035  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5036  computeCrsPadding(
5037  const RowGraph<LocalOrdinal,GlobalOrdinal,Node>& source,
5038  const size_t numSameIDs,
5039  const Kokkos::DualView<const local_ordinal_type*,
5040  buffer_device_type>& permuteToLIDs,
5041  const Kokkos::DualView<const local_ordinal_type*,
5042  buffer_device_type>& permuteFromLIDs,
5043  const bool verbose) const
5044  {
5045  using LO = local_ordinal_type;
5046  using std::endl;
5047 
5048  std::unique_ptr<std::string> prefix;
5049  if (verbose) {
5050  prefix = this->createPrefix("CrsGraph",
5051  "computeCrsPadding(same & permute)");
5052  std::ostringstream os;
5053  os << *prefix << "{numSameIDs: " << numSameIDs
5054  << ", numPermutes: " << permuteFromLIDs.extent(0) << "}"
5055  << endl;
5056  std::cerr << os.str();
5057  }
5058 
5059  const int myRank = [&] () {
5060  auto comm = rowMap_.is_null() ? Teuchos::null :
5061  rowMap_->getComm();
5062  return comm.is_null() ? -1 : comm->getRank();
5063  } ();
5064  std::unique_ptr<padding_type> padding(
5065  new padding_type(myRank, numSameIDs,
5066  permuteFromLIDs.extent(0)));
5067 
5068  computeCrsPaddingForSameIDs(*padding, source,
5069  static_cast<LO>(numSameIDs));
5070  computeCrsPaddingForPermutedIDs(*padding, source, permuteToLIDs,
5071  permuteFromLIDs);
5072  return padding;
5073  }
5074 
5075  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5076  void
5077  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5078  computeCrsPaddingForSameIDs(
5079  padding_type& padding,
5080  const RowGraph<local_ordinal_type, global_ordinal_type,
5081  node_type>& source,
5082  const local_ordinal_type numSameIDs) const
5083  {
5084  using LO = local_ordinal_type;
5085  using GO = global_ordinal_type;
5086  using Details::Impl::getRowGraphGlobalRow;
5087  using std::endl;
5088  const char tfecfFuncName[] = "computeCrsPaddingForSameIds";
5089 
5090  std::unique_ptr<std::string> prefix;
5091  const bool verbose = verbose_;
5092  if (verbose) {
5093  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5094  std::ostringstream os;
5095  os << *prefix << "numSameIDs: " << numSameIDs << endl;
5096  std::cerr << os.str();
5097  }
5098 
5099  if (numSameIDs == 0) {
5100  return;
5101  }
5102 
5103  const map_type& srcRowMap = *(source.getRowMap());
5104  const map_type& tgtRowMap = *rowMap_;
5105  using this_CRS_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
5106  const this_CRS_type* srcCrs = dynamic_cast<const this_CRS_type*>(&source);
5107  const bool src_is_unique =
5108  srcCrs == nullptr ? false : srcCrs->isMerged();
5109  const bool tgt_is_unique = this->isMerged();
5110 
5111  std::vector<GO> srcGblColIndsScratch;
5112  std::vector<GO> tgtGblColIndsScratch;
5113 
5114  execute_sync_host_uvm_access(); // protect host UVM access
5115  for (LO lclRowInd = 0; lclRowInd < numSameIDs; ++lclRowInd) {
5116  const GO srcGblRowInd = srcRowMap.getGlobalElement(lclRowInd);
5117  const GO tgtGblRowInd = tgtRowMap.getGlobalElement(lclRowInd);
5118  auto srcGblColInds = getRowGraphGlobalRow(
5119  srcGblColIndsScratch, source, srcGblRowInd);
5120  auto tgtGblColInds = getRowGraphGlobalRow(
5121  tgtGblColIndsScratch, *this, tgtGblRowInd);
5122  padding.update_same(lclRowInd, tgtGblColInds.getRawPtr(),
5123  tgtGblColInds.size(), tgt_is_unique,
5124  srcGblColInds.getRawPtr(),
5125  srcGblColInds.size(), src_is_unique);
5126  }
5127  if (verbose) {
5128  std::ostringstream os;
5129  os << *prefix << "Done" << endl;
5130  std::cerr << os.str();
5131  }
5132  }
5133 
5134  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5135  void
5136  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5137  computeCrsPaddingForPermutedIDs(
5138  padding_type& padding,
5139  const RowGraph<local_ordinal_type, global_ordinal_type,
5140  node_type>& source,
5141  const Kokkos::DualView<const local_ordinal_type*,
5142  buffer_device_type>& permuteToLIDs,
5143  const Kokkos::DualView<const local_ordinal_type*,
5144  buffer_device_type>& permuteFromLIDs) const
5145  {
5146  using LO = local_ordinal_type;
5147  using GO = global_ordinal_type;
5148  using Details::Impl::getRowGraphGlobalRow;
5149  using std::endl;
5150  const char tfecfFuncName[] = "computeCrsPaddingForPermutedIds";
5151 
5152  std::unique_ptr<std::string> prefix;
5153  const bool verbose = verbose_;
5154  if (verbose) {
5155  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5156  std::ostringstream os;
5157  os << *prefix << "permuteToLIDs.extent(0): "
5158  << permuteToLIDs.extent(0)
5159  << ", permuteFromLIDs.extent(0): "
5160  << permuteFromLIDs.extent(0) << endl;
5161  std::cerr << os.str();
5162  }
5163 
5164  if (permuteToLIDs.extent(0) == 0) {
5165  return;
5166  }
5167 
5168  const map_type& srcRowMap = *(source.getRowMap());
5169  const map_type& tgtRowMap = *rowMap_;
5170  using this_CRS_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
5171  const this_CRS_type* srcCrs = dynamic_cast<const this_CRS_type*>(&source);
5172  const bool src_is_unique =
5173  srcCrs == nullptr ? false : srcCrs->isMerged();
5174  const bool tgt_is_unique = this->isMerged();
5175 
5176  TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host() );
5177  auto permuteToLIDs_h = permuteToLIDs.view_host();
5178  TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host() );
5179  auto permuteFromLIDs_h = permuteFromLIDs.view_host();
5180 
5181  std::vector<GO> srcGblColIndsScratch;
5182  std::vector<GO> tgtGblColIndsScratch;
5183  const LO numPermutes = static_cast<LO>(permuteToLIDs_h.extent(0));
5184 
5185  execute_sync_host_uvm_access(); // protect host UVM access
5186  for (LO whichPermute = 0; whichPermute < numPermutes; ++whichPermute) {
5187  const LO srcLclRowInd = permuteFromLIDs_h[whichPermute];
5188  const GO srcGblRowInd = srcRowMap.getGlobalElement(srcLclRowInd);
5189  auto srcGblColInds = getRowGraphGlobalRow(
5190  srcGblColIndsScratch, source, srcGblRowInd);
5191  const LO tgtLclRowInd = permuteToLIDs_h[whichPermute];
5192  const GO tgtGblRowInd = tgtRowMap.getGlobalElement(tgtLclRowInd);
5193  auto tgtGblColInds = getRowGraphGlobalRow(
5194  tgtGblColIndsScratch, *this, tgtGblRowInd);
5195  padding.update_permute(whichPermute, tgtLclRowInd,
5196  tgtGblColInds.getRawPtr(),
5197  tgtGblColInds.size(), tgt_is_unique,
5198  srcGblColInds.getRawPtr(),
5199  srcGblColInds.size(), src_is_unique);
5200  }
5201 
5202  if (verbose) {
5203  std::ostringstream os;
5204  os << *prefix << "Done" << endl;
5205  std::cerr << os.str();
5206  }
5207  }
5208 
5209  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5210  std::unique_ptr<
5211  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5212  >
5213  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5214  computeCrsPaddingForImports(
5215  const Kokkos::DualView<const local_ordinal_type*,
5216  buffer_device_type>& importLIDs,
5217  Kokkos::DualView<packet_type*, buffer_device_type> imports,
5218  Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
5219  const bool verbose) const
5220  {
5221  using Details::Impl::getRowGraphGlobalRow;
5222  using std::endl;
5223  using LO = local_ordinal_type;
5224  using GO = global_ordinal_type;
5225  const char tfecfFuncName[] = "computeCrsPaddingForImports";
5226 
5227  std::unique_ptr<std::string> prefix;
5228  if (verbose) {
5229  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5230  std::ostringstream os;
5231  os << *prefix << "importLIDs.extent(0): "
5232  << importLIDs.extent(0)
5233  << ", imports.extent(0): "
5234  << imports.extent(0)
5235  << ", numPacketsPerLID.extent(0): "
5236  << numPacketsPerLID.extent(0) << endl;
5237  std::cerr << os.str();
5238  }
5239 
5240  const LO numImports = static_cast<LO>(importLIDs.extent(0));
5241  const int myRank = [&] () {
5242  auto comm = rowMap_.is_null() ? Teuchos::null :
5243  rowMap_->getComm();
5244  return comm.is_null() ? -1 : comm->getRank();
5245  } ();
5246  std::unique_ptr<padding_type> padding(
5247  new padding_type(myRank, numImports));
5248 
5249  if (imports.need_sync_host()) {
5250  imports.sync_host();
5251  }
5252  auto imports_h = imports.view_host();
5253  if (numPacketsPerLID.need_sync_host ()) {
5254  numPacketsPerLID.sync_host();
5255  }
5256  auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5257 
5258  TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
5259  auto importLIDs_h = importLIDs.view_host();
5260 
5261  const map_type& tgtRowMap = *rowMap_;
5262  // Always merge source column indices, since isMerged() is
5263  // per-process state, and we don't know its value on other
5264  // processes that sent us data.
5265  constexpr bool src_is_unique = false;
5266  const bool tgt_is_unique = isMerged();
5267 
5268  std::vector<GO> tgtGblColIndsScratch;
5269  size_t offset = 0;
5270  execute_sync_host_uvm_access(); // protect host UVM access
5271  for (LO whichImport = 0; whichImport < numImports; ++whichImport) {
5272  // CrsGraph packs just global column indices, while CrsMatrix
5273  // packs bytes (first the number of entries in the row, then the
5274  // global column indices, then other stuff like the matrix
5275  // values in that row).
5276  const LO origSrcNumEnt =
5277  static_cast<LO>(numPacketsPerLID_h[whichImport]);
5278  GO* const srcGblColInds = imports_h.data() + offset;
5279 
5280  const LO tgtLclRowInd = importLIDs_h[whichImport];
5281  const GO tgtGblRowInd =
5282  tgtRowMap.getGlobalElement(tgtLclRowInd);
5283  auto tgtGblColInds = getRowGraphGlobalRow(
5284  tgtGblColIndsScratch, *this, tgtGblRowInd);
5285  const size_t origTgtNumEnt(tgtGblColInds.size());
5286 
5287  padding->update_import(whichImport, tgtLclRowInd,
5288  tgtGblColInds.getRawPtr(),
5289  origTgtNumEnt, tgt_is_unique,
5290  srcGblColInds,
5291  origSrcNumEnt, src_is_unique);
5292  offset += origSrcNumEnt;
5293  }
5294 
5295  if (verbose) {
5296  std::ostringstream os;
5297  os << *prefix << "Done" << endl;
5298  std::cerr << os.str();
5299  }
5300  return padding;
5301  }
5302 
5303  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5304  std::unique_ptr<
5305  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5306  >
5307  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5308  computePaddingForCrsMatrixUnpack(
5309  const Kokkos::DualView<const local_ordinal_type*,
5310  buffer_device_type>& importLIDs,
5311  Kokkos::DualView<char*, buffer_device_type> imports,
5312  Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
5313  const bool verbose) const
5314  {
5315  using Details::Impl::getRowGraphGlobalRow;
5316  using Details::PackTraits;
5317  using std::endl;
5318  using LO = local_ordinal_type;
5319  using GO = global_ordinal_type;
5320  const char tfecfFuncName[] = "computePaddingForCrsMatrixUnpack";
5321 
5322  std::unique_ptr<std::string> prefix;
5323  if (verbose) {
5324  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5325  std::ostringstream os;
5326  os << *prefix << "importLIDs.extent(0): "
5327  << importLIDs.extent(0)
5328  << ", imports.extent(0): "
5329  << imports.extent(0)
5330  << ", numPacketsPerLID.extent(0): "
5331  << numPacketsPerLID.extent(0) << endl;
5332  std::cerr << os.str();
5333  }
5334  const bool extraVerbose =
5335  verbose && Details::Behavior::verbose("CrsPadding");
5336 
5337  const LO numImports = static_cast<LO>(importLIDs.extent(0));
5338  TEUCHOS_ASSERT( LO(numPacketsPerLID.extent(0)) >= numImports );
5339  const int myRank = [&] () {
5340  auto comm = rowMap_.is_null() ? Teuchos::null :
5341  rowMap_->getComm();
5342  return comm.is_null() ? -1 : comm->getRank();
5343  } ();
5344  std::unique_ptr<padding_type> padding(
5345  new padding_type(myRank, numImports));
5346 
5347  if (imports.need_sync_host()) {
5348  imports.sync_host();
5349  }
5350  auto imports_h = imports.view_host();
5351  if (numPacketsPerLID.need_sync_host ()) {
5352  numPacketsPerLID.sync_host();
5353  }
5354  auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5355 
5356  TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
5357  auto importLIDs_h = importLIDs.view_host();
5358 
5359  const map_type& tgtRowMap = *rowMap_;
5360  // Always merge source column indices, since isMerged() is
5361  // per-process state, and we don't know its value on other
5362  // processes that sent us data.
5363  constexpr bool src_is_unique = false;
5364  const bool tgt_is_unique = isMerged();
5365 
5366  std::vector<GO> srcGblColIndsScratch;
5367  std::vector<GO> tgtGblColIndsScratch;
5368  size_t offset = 0;
5369  execute_sync_host_uvm_access(); // protect host UVM access
5370  for (LO whichImport = 0; whichImport < numImports; ++whichImport) {
5371  // CrsGraph packs just global column indices, while CrsMatrix
5372  // packs bytes (first the number of entries in the row, then the
5373  // global column indices, then other stuff like the matrix
5374  // values in that row).
5375  const size_t numBytes = numPacketsPerLID_h[whichImport];
5376  if (extraVerbose) {
5377  std::ostringstream os;
5378  os << *prefix << "whichImport=" << whichImport
5379  << ", numImports=" << numImports
5380  << ", numBytes=" << numBytes << endl;
5381  std::cerr << os.str();
5382  }
5383  if (numBytes == 0) {
5384  continue; // special case: no entries to unpack for this row
5385  }
5386  LO origSrcNumEnt = 0;
5387  const size_t numEntBeg = offset;
5388  const size_t numEntLen =
5389  PackTraits<LO>::packValueCount(origSrcNumEnt);
5390  TEUCHOS_ASSERT( numBytes >= numEntLen );
5391  TEUCHOS_ASSERT( imports_h.extent(0) >= numEntBeg + numEntLen );
5392  PackTraits<LO>::unpackValue(origSrcNumEnt,
5393  imports_h.data() + numEntBeg);
5394  if (extraVerbose) {
5395  std::ostringstream os;
5396  os << *prefix << "whichImport=" << whichImport
5397  << ", numImports=" << numImports
5398  << ", origSrcNumEnt=" << origSrcNumEnt << endl;
5399  std::cerr << os.str();
5400  }
5401  TEUCHOS_ASSERT( origSrcNumEnt >= LO(0) );
5402  TEUCHOS_ASSERT( numBytes >= size_t(numEntLen + origSrcNumEnt * sizeof(GO)) );
5403  const size_t gidsBeg = numEntBeg + numEntLen;
5404  if (srcGblColIndsScratch.size() < size_t(origSrcNumEnt)) {
5405  srcGblColIndsScratch.resize(origSrcNumEnt);
5406  }
5407  GO* const srcGblColInds = srcGblColIndsScratch.data();
5408  PackTraits<GO>::unpackArray(srcGblColInds,
5409  imports_h.data() + gidsBeg,
5410  origSrcNumEnt);
5411  const LO tgtLclRowInd = importLIDs_h[whichImport];
5412  const GO tgtGblRowInd =
5413  tgtRowMap.getGlobalElement(tgtLclRowInd);
5414  auto tgtGblColInds = getRowGraphGlobalRow(
5415  tgtGblColIndsScratch, *this, tgtGblRowInd);
5416  const size_t origNumTgtEnt(tgtGblColInds.size());
5417 
5418  if (extraVerbose) {
5419  std::ostringstream os;
5420  os << *prefix << "whichImport=" << whichImport
5421  << ", numImports=" << numImports
5422  << ": Call padding->update_import" << endl;
5423  std::cerr << os.str();
5424  }
5425  padding->update_import(whichImport, tgtLclRowInd,
5426  tgtGblColInds.getRawPtr(),
5427  origNumTgtEnt, tgt_is_unique,
5428  srcGblColInds,
5429  origSrcNumEnt, src_is_unique);
5430  offset += numBytes;
5431  }
5432 
5433  if (verbose) {
5434  std::ostringstream os;
5435  os << *prefix << "Done" << endl;
5436  std::cerr << os.str();
5437  }
5438  return padding;
5439  }
5440 
5441  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5442  void
5443  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5444  packAndPrepare
5445  (const SrcDistObject& source,
5446  const Kokkos::DualView<const local_ordinal_type*,
5447  buffer_device_type>& exportLIDs,
5448  Kokkos::DualView<packet_type*,
5449  buffer_device_type>& exports,
5450  Kokkos::DualView<size_t*,
5451  buffer_device_type> numPacketsPerLID,
5452  size_t& constantNumPackets)
5453  {
5455  using GO = global_ordinal_type;
5456  using std::endl;
5457  using crs_graph_type =
5458  CrsGraph<local_ordinal_type, global_ordinal_type, node_type>;
5459  const char tfecfFuncName[] = "packAndPrepare: ";
5460  ProfilingRegion region_papn ("Tpetra::CrsGraph::packAndPrepare");
5461 
5462  const bool verbose = verbose_;
5463  std::unique_ptr<std::string> prefix;
5464  if (verbose) {
5465  prefix = this->createPrefix("CrsGraph", "packAndPrepare");
5466  std::ostringstream os;
5467  os << *prefix << "Start" << endl;
5468  std::cerr << os.str();
5469  }
5470 
5471  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5472  (exportLIDs.extent (0) != numPacketsPerLID.extent (0),
5473  std::runtime_error,
5474  "exportLIDs.extent(0) = " << exportLIDs.extent (0)
5475  << " != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent (0)
5476  << ".");
5477  const row_graph_type* srcRowGraphPtr =
5478  dynamic_cast<const row_graph_type*> (&source);
5479  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5480  (srcRowGraphPtr == nullptr, std::invalid_argument, "Source of an Export "
5481  "or Import operation to a CrsGraph must be a RowGraph with the same "
5482  "template parameters.");
5483  // We don't check whether src_graph has had fillComplete called,
5484  // because it doesn't matter whether the *source* graph has been
5485  // fillComplete'd. The target graph can not be fillComplete'd yet.
5486  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5487  (this->isFillComplete (), std::runtime_error,
5488  "The target graph of an Import or Export must not be fill complete.");
5489 
5490  const crs_graph_type* srcCrsGraphPtr =
5491  dynamic_cast<const crs_graph_type*> (&source);
5492 
5493  if (srcCrsGraphPtr == nullptr) {
5494  using Teuchos::ArrayView;
5495  using LO = local_ordinal_type;
5496 
5497  if (verbose) {
5498  std::ostringstream os;
5499  os << *prefix << "Source is a RowGraph but not a CrsGraph"
5500  << endl;
5501  std::cerr << os.str();
5502  }
5503  // RowGraph::pack serves the "old" DistObject interface. It
5504  // takes Teuchos::ArrayView and Teuchos::Array&. The latter
5505  // entails deep-copying the exports buffer on output. RowGraph
5506  // is a convenience interface when not a CrsGraph, so we accept
5507  // the performance hit.
5508  TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
5509  auto exportLIDs_h = exportLIDs.view_host ();
5510  ArrayView<const LO> exportLIDs_av (exportLIDs_h.data (),
5511  exportLIDs_h.extent (0));
5512  Teuchos::Array<GO> exports_a;
5513 
5514  numPacketsPerLID.clear_sync_state ();
5515  numPacketsPerLID.modify_host ();
5516  auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
5517  ArrayView<size_t> numPacketsPerLID_av (numPacketsPerLID_h.data (),
5518  numPacketsPerLID_h.extent (0));
5519  srcRowGraphPtr->pack (exportLIDs_av, exports_a, numPacketsPerLID_av,
5520  constantNumPackets);
5521  const size_t newSize = static_cast<size_t> (exports_a.size ());
5522  if (static_cast<size_t> (exports.extent (0)) != newSize) {
5523  using exports_dv_type = Kokkos::DualView<packet_type*, buffer_device_type>;
5524  exports = exports_dv_type ("exports", newSize);
5525  }
5526  Kokkos::View<const packet_type*, Kokkos::HostSpace,
5527  Kokkos::MemoryUnmanaged> exports_a_h (exports_a.getRawPtr (), newSize);
5528  exports.clear_sync_state ();
5529  exports.modify_host ();
5530  // DEEP_COPY REVIEW - NOT TESTED
5531  Kokkos::deep_copy (exports.view_host (), exports_a_h);
5532  }
5533  // packCrsGraphNew requires k_rowPtrsPacked_ to be set
5534  else if (! getColMap ().is_null () &&
5535  (this->getRowPtrsPackedDevice().extent (0) != 0 ||
5536  getRowMap ()->getLocalNumElements () == 0)) {
5537  if (verbose) {
5538  std::ostringstream os;
5539  os << *prefix << "packCrsGraphNew path" << endl;
5540  std::cerr << os.str();
5541  }
5542  using export_pids_type =
5543  Kokkos::DualView<const int*, buffer_device_type>;
5544  export_pids_type exportPIDs; // not filling it; needed for syntax
5545  using LO = local_ordinal_type;
5546  using NT = node_type;
5548  packCrsGraphNew<LO,GO,NT> (*srcCrsGraphPtr, exportLIDs, exportPIDs,
5549  exports, numPacketsPerLID,
5550  constantNumPackets, false);
5551  }
5552  else {
5553  srcCrsGraphPtr->packFillActiveNew (exportLIDs, exports, numPacketsPerLID,
5554  constantNumPackets);
5555  }
5556 
5557  if (verbose) {
5558  std::ostringstream os;
5559  os << *prefix << "Done" << endl;
5560  std::cerr << os.str();
5561  }
5562  }
5563 
5564  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5565  void
5567  pack (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5568  Teuchos::Array<GlobalOrdinal>& exports,
5569  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5570  size_t& constantNumPackets) const
5571  {
5572  auto col_map = this->getColMap();
5573  // packCrsGraph requires k_rowPtrsPacked to be set
5574  if( !col_map.is_null() && (this->getRowPtrsPackedDevice().extent(0) != 0 || getRowMap()->getLocalNumElements() ==0)) {
5576  packCrsGraph<LocalOrdinal,GlobalOrdinal,Node>(*this, exports, numPacketsPerLID,
5577  exportLIDs, constantNumPackets);
5578  }
5579  else {
5580  this->packFillActive(exportLIDs, exports, numPacketsPerLID,
5581  constantNumPackets);
5582  }
5583  }
5584 
5585  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5586  void
5588  packFillActive (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5589  Teuchos::Array<GlobalOrdinal>& exports,
5590  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5591  size_t& constantNumPackets) const
5592  {
5593  using std::endl;
5594  using LO = LocalOrdinal;
5595  using GO = GlobalOrdinal;
5596  using host_execution_space =
5597  typename Kokkos::View<size_t*, device_type>::
5598  HostMirror::execution_space;
5599  const char tfecfFuncName[] = "packFillActive: ";
5600  const bool verbose = verbose_;
5601 
5602  const auto numExportLIDs = exportLIDs.size ();
5603  std::unique_ptr<std::string> prefix;
5604  if (verbose) {
5605  prefix = this->createPrefix("CrsGraph", "allocateIndices");
5606  std::ostringstream os;
5607  os << *prefix << "numExportLIDs=" << numExportLIDs << endl;
5608  std::cerr << os.str();
5609  }
5610  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5611  (numExportLIDs != numPacketsPerLID.size (), std::runtime_error,
5612  "exportLIDs.size() = " << numExportLIDs << " != numPacketsPerLID.size()"
5613  " = " << numPacketsPerLID.size () << ".");
5614 
5615  const map_type& rowMap = * (this->getRowMap ());
5616  const map_type* const colMapPtr = this->colMap_.getRawPtr ();
5617  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5618  (this->isLocallyIndexed () && colMapPtr == nullptr, std::logic_error,
5619  "This graph claims to be locally indexed, but its column Map is nullptr. "
5620  "This should never happen. Please report this bug to the Tpetra "
5621  "developers.");
5622 
5623  // We may pack different amounts of data for different rows.
5624  constantNumPackets = 0;
5625 
5626  // mfh 20 Sep 2017: Teuchos::ArrayView isn't thread safe (well,
5627  // it might be now, but we might as well be safe).
5628  size_t* const numPacketsPerLID_raw = numPacketsPerLID.getRawPtr ();
5629  const LO* const exportLIDs_raw = exportLIDs.getRawPtr ();
5630 
5631  // Count the total number of packets (column indices, in the case
5632  // of a CrsGraph) to pack. While doing so, set
5633  // numPacketsPerLID[i] to the number of entries owned by the
5634  // calling process in (local) row exportLIDs[i] of the graph, that
5635  // the caller wants us to send out.
5636  Kokkos::RangePolicy<host_execution_space, LO> inputRange (0, numExportLIDs);
5637  size_t totalNumPackets = 0;
5638  size_t errCount = 0;
5639  // lambdas turn what they capture const, so we can't
5640  // atomic_add(&errCount,1). Instead, we need a View to modify.
5641  typedef Kokkos::Device<host_execution_space, Kokkos::HostSpace>
5642  host_device_type;
5643  Kokkos::View<size_t, host_device_type> errCountView (&errCount);
5644  constexpr size_t ONE = 1;
5645 
5646  execute_sync_host_uvm_access(); // protect host UVM access
5647  Kokkos::parallel_reduce ("Tpetra::CrsGraph::pack: totalNumPackets",
5648  inputRange,
5649  [=] (const LO& i, size_t& curTotalNumPackets) {
5650  const GO gblRow = rowMap.getGlobalElement (exportLIDs_raw[i]);
5651  if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
5652  Kokkos::atomic_add (&errCountView(), ONE);
5653  numPacketsPerLID_raw[i] = 0;
5654  }
5655  else {
5656  const size_t numEnt = this->getNumEntriesInGlobalRow (gblRow);
5657  numPacketsPerLID_raw[i] = numEnt;
5658  curTotalNumPackets += numEnt;
5659  }
5660  },
5661  totalNumPackets);
5662 
5663  if (verbose) {
5664  std::ostringstream os;
5665  os << *prefix << "totalNumPackets=" << totalNumPackets << endl;
5666  std::cerr << os.str();
5667  }
5668  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5669  (errCount != 0, std::logic_error, "totalNumPackets count encountered "
5670  "one or more errors! errCount = " << errCount
5671  << ", totalNumPackets = " << totalNumPackets << ".");
5672  errCount = 0;
5673 
5674  // Allocate space for all the column indices to pack.
5675  exports.resize (totalNumPackets);
5676 
5677  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5678  (! this->supportsRowViews (), std::logic_error,
5679  "this->supportsRowViews() returns false; this should never happen. "
5680  "Please report this bug to the Tpetra developers.");
5681 
5682  // Loop again over the rows to export, and pack rows of indices
5683  // into the output buffer.
5684 
5685  if (verbose) {
5686  std::ostringstream os;
5687  os << *prefix << "Pack into exports" << endl;
5688  std::cerr << os.str();
5689  }
5690 
5691  // Teuchos::ArrayView may not be thread safe, or may not be
5692  // efficiently thread safe. Better to use the raw pointer.
5693  GO* const exports_raw = exports.getRawPtr ();
5694  errCount = 0;
5695  Kokkos::parallel_scan ("Tpetra::CrsGraph::pack: pack from views",
5696  inputRange, [=, &prefix]
5697  (const LO i, size_t& exportsOffset, const bool final) {
5698  const size_t curOffset = exportsOffset;
5699  const GO gblRow = rowMap.getGlobalElement (exportLIDs_raw[i]);
5700  const RowInfo rowInfo =
5701  this->getRowInfoFromGlobalRowIndex (gblRow);
5702 
5703  using TDO = Tpetra::Details::OrdinalTraits<size_t>;
5704  if (rowInfo.localRow == TDO::invalid ()) {
5705  if (verbose) {
5706  std::ostringstream os;
5707  os << *prefix << ": INVALID rowInfo: i=" << i
5708  << ", lclRow=" << exportLIDs_raw[i] << endl;
5709  std::cerr << os.str();
5710  }
5711  Kokkos::atomic_add (&errCountView(), ONE);
5712  }
5713  else if (curOffset + rowInfo.numEntries > totalNumPackets) {
5714  if (verbose) {
5715  std::ostringstream os;
5716  os << *prefix << ": UH OH! For i=" << i << ", lclRow="
5717  << exportLIDs_raw[i] << ", gblRow=" << gblRow << ", curOffset "
5718  "(= " << curOffset << ") + numEnt (= " << rowInfo.numEntries
5719  << ") > totalNumPackets (= " << totalNumPackets << ")."
5720  << endl;
5721  std::cerr << os.str();
5722  }
5723  Kokkos::atomic_add (&errCountView(), ONE);
5724  }
5725  else {
5726  const LO numEnt = static_cast<LO> (rowInfo.numEntries);
5727  if (this->isLocallyIndexed ()) {
5728  auto lclColInds = getLocalIndsViewHost (rowInfo);
5729  if (final) {
5730  for (LO k = 0; k < numEnt; ++k) {
5731  const LO lclColInd = lclColInds(k);
5732  const GO gblColInd = colMapPtr->getGlobalElement (lclColInd);
5733  // Pack it, even if it's wrong. Let the receiving
5734  // process deal with it. Otherwise, we'll miss out
5735  // on any correct data.
5736  exports_raw[curOffset + k] = gblColInd;
5737  } // for each entry in the row
5738  } // final pass?
5739  exportsOffset = curOffset + numEnt;
5740  }
5741  else if (this->isGloballyIndexed ()) {
5742  auto gblColInds = getGlobalIndsViewHost (rowInfo);
5743  if (final) {
5744  for (LO k = 0; k < numEnt; ++k) {
5745  const GO gblColInd = gblColInds(k);
5746  // Pack it, even if it's wrong. Let the receiving
5747  // process deal with it. Otherwise, we'll miss out
5748  // on any correct data.
5749  exports_raw[curOffset + k] = gblColInd;
5750  } // for each entry in the row
5751  } // final pass?
5752  exportsOffset = curOffset + numEnt;
5753  }
5754  // If neither globally nor locally indexed, then the graph
5755  // has no entries in this row (or indeed, in any row on this
5756  // process) to pack.
5757  }
5758  });
5759 
5760  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5761  (errCount != 0, std::logic_error, "Packing encountered "
5762  "one or more errors! errCount = " << errCount
5763  << ", totalNumPackets = " << totalNumPackets << ".");
5764 
5765  if (verbose) {
5766  std::ostringstream os;
5767  os << *prefix << "Done" << endl;
5768  std::cerr << os.str();
5769  }
5770  }
5771 
5772  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5773  void
5774  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5775  packFillActiveNew (const Kokkos::DualView<const local_ordinal_type*,
5776  buffer_device_type>& exportLIDs,
5777  Kokkos::DualView<packet_type*,
5778  buffer_device_type>& exports,
5779  Kokkos::DualView<size_t*,
5780  buffer_device_type> numPacketsPerLID,
5781  size_t& constantNumPackets) const
5782  {
5783  using std::endl;
5784  using LO = local_ordinal_type;
5785  using GO = global_ordinal_type;
5786  using host_execution_space = typename Kokkos::View<size_t*,
5787  device_type>::HostMirror::execution_space;
5788  using host_device_type =
5789  Kokkos::Device<host_execution_space, Kokkos::HostSpace>;
5790  using exports_dv_type =
5791  Kokkos::DualView<packet_type*, buffer_device_type>;
5792  const char tfecfFuncName[] = "packFillActiveNew: ";
5793  const bool verbose = verbose_;
5794 
5795  const auto numExportLIDs = exportLIDs.extent (0);
5796  std::unique_ptr<std::string> prefix;
5797  if (verbose) {
5798  prefix = this->createPrefix("CrsGraph", "packFillActiveNew");
5799  std::ostringstream os;
5800  os << *prefix << "numExportLIDs: " << numExportLIDs
5801  << ", numPacketsPerLID.extent(0): "
5802  << numPacketsPerLID.extent(0) << endl;
5803  std::cerr << os.str();
5804  }
5805  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5806  (numExportLIDs != numPacketsPerLID.extent (0), std::runtime_error,
5807  "exportLIDs.extent(0) = " << numExportLIDs
5808  << " != numPacketsPerLID.extent(0) = "
5809  << numPacketsPerLID.extent (0) << ".");
5810  TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
5811  auto exportLIDs_h = exportLIDs.view_host ();
5812 
5813  const map_type& rowMap = * (this->getRowMap ());
5814  const map_type* const colMapPtr = this->colMap_.getRawPtr ();
5815  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5816  (this->isLocallyIndexed () && colMapPtr == nullptr, std::logic_error,
5817  "This graph claims to be locally indexed, but its column Map is nullptr. "
5818  "This should never happen. Please report this bug to the Tpetra "
5819  "developers.");
5820 
5821  // We may pack different amounts of data for different rows.
5822  constantNumPackets = 0;
5823 
5824  numPacketsPerLID.clear_sync_state ();
5825  numPacketsPerLID.modify_host ();
5826  auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
5827 
5828  // Count the total number of packets (column indices, in the case
5829  // of a CrsGraph) to pack. While doing so, set
5830  // numPacketsPerLID[i] to the number of entries owned by the
5831  // calling process in (local) row exportLIDs[i] of the graph, that
5832  // the caller wants us to send out.
5833  using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
5834  range_type inputRange (0, numExportLIDs);
5835  size_t totalNumPackets = 0;
5836  size_t errCount = 0;
5837  // lambdas turn what they capture const, so we can't
5838  // atomic_add(&errCount,1). Instead, we need a View to modify.
5839  Kokkos::View<size_t, host_device_type> errCountView (&errCount);
5840  constexpr size_t ONE = 1;
5841 
5842  if (verbose) {
5843  std::ostringstream os;
5844  os << *prefix << "Compute totalNumPackets" << endl;
5845  std::cerr << os.str ();
5846  }
5847 
5848  execute_sync_host_uvm_access(); // protect host UVM access
5849  Kokkos::parallel_reduce
5850  ("Tpetra::CrsGraph::pack: totalNumPackets",
5851  inputRange,
5852  [=, &prefix] (const LO i, size_t& curTotalNumPackets) {
5853  const LO lclRow = exportLIDs_h[i];
5854  const GO gblRow = rowMap.getGlobalElement (lclRow);
5855  if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
5856  if (verbose) {
5857  std::ostringstream os;
5858  os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5859  << " not in row Map on this process" << endl;
5860  std::cerr << os.str();
5861  }
5862  Kokkos::atomic_add (&errCountView(), ONE);
5863  numPacketsPerLID_h(i) = 0;
5864  }
5865  else {
5866  const size_t numEnt = this->getNumEntriesInGlobalRow (gblRow);
5867  numPacketsPerLID_h(i) = numEnt;
5868  curTotalNumPackets += numEnt;
5869  }
5870  },
5871  totalNumPackets);
5872 
5873  if (verbose) {
5874  std::ostringstream os;
5875  os << *prefix << "totalNumPackets: " << totalNumPackets
5876  << ", errCount: " << errCount << endl;
5877  std::cerr << os.str ();
5878  }
5879  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5880  (errCount != 0, std::logic_error, "totalNumPackets count encountered "
5881  "one or more errors! totalNumPackets: " << totalNumPackets
5882  << ", errCount: " << errCount << ".");
5883 
5884  // Allocate space for all the column indices to pack.
5885  if (size_t(exports.extent (0)) < totalNumPackets) {
5886  // FIXME (mfh 09 Apr 2019) Create without initializing.
5887  exports = exports_dv_type ("exports", totalNumPackets);
5888  }
5889 
5890  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5891  (! this->supportsRowViews (), std::logic_error,
5892  "this->supportsRowViews() returns false; this should never happen. "
5893  "Please report this bug to the Tpetra developers.");
5894 
5895  // Loop again over the rows to export, and pack rows of indices
5896  // into the output buffer.
5897 
5898  if (verbose) {
5899  std::ostringstream os;
5900  os << *prefix << "Pack into exports buffer" << endl;
5901  std::cerr << os.str();
5902  }
5903 
5904  exports.clear_sync_state ();
5905  exports.modify_host ();
5906  auto exports_h = exports.view_host ();
5907 
5908  errCount = 0;
5909 
5910  // The following parallel_scan needs const host access to lclIndsUnpacked_wdv
5911  // (if locally indexed) or gblInds_wdv (if globally indexed).
5912  if(isLocallyIndexed())
5913  lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
5914  else if(isGloballyIndexed())
5915  gblInds_wdv.getHostView(Access::ReadOnly);
5916 
5918  Kokkos::parallel_scan
5919  ("Tpetra::CrsGraph::packFillActiveNew: Pack exports",
5920  inputRange, [=, &prefix]
5921  (const LO i, size_t& exportsOffset, const bool final) {
5922  const size_t curOffset = exportsOffset;
5923  const LO lclRow = exportLIDs_h(i);
5924  const GO gblRow = rowMap.getGlobalElement (lclRow);
5925  if (gblRow == Details::OrdinalTraits<GO>::invalid ()) {
5926  if (verbose) {
5927  std::ostringstream os;
5928  os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5929  << " not in row Map on this process" << endl;
5930  std::cerr << os.str();
5931  }
5932  Kokkos::atomic_add (&errCountView(), ONE);
5933  return;
5934  }
5935 
5936  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (gblRow);
5937  if (rowInfo.localRow == Details::OrdinalTraits<size_t>::invalid ()) {
5938  if (verbose) {
5939  std::ostringstream os;
5940  os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5941  << ", gblRow=" << gblRow << ": invalid rowInfo"
5942  << endl;
5943  std::cerr << os.str();
5944  }
5945  Kokkos::atomic_add (&errCountView(), ONE);
5946  return;
5947  }
5948 
5949  if (curOffset + rowInfo.numEntries > totalNumPackets) {
5950  if (verbose) {
5951  std::ostringstream os;
5952  os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5953  << ", gblRow=" << gblRow << ", curOffset (= "
5954  << curOffset << ") + numEnt (= " << rowInfo.numEntries
5955  << ") > totalNumPackets (= " << totalNumPackets
5956  << ")." << endl;
5957  std::cerr << os.str();
5958  }
5959  Kokkos::atomic_add (&errCountView(), ONE);
5960  return;
5961  }
5962 
5963  const LO numEnt = static_cast<LO> (rowInfo.numEntries);
5964  if (this->isLocallyIndexed ()) {
5965  auto lclColInds = getLocalIndsViewHost(rowInfo);
5966  if (final) {
5967  for (LO k = 0; k < numEnt; ++k) {
5968  const LO lclColInd = lclColInds(k);
5969  const GO gblColInd = colMapPtr->getGlobalElement (lclColInd);
5970  // Pack it, even if it's wrong. Let the receiving
5971  // process deal with it. Otherwise, we'll miss out
5972  // on any correct data.
5973  exports_h(curOffset + k) = gblColInd;
5974  } // for each entry in the row
5975  } // final pass?
5976  exportsOffset = curOffset + numEnt;
5977  }
5978  else if (this->isGloballyIndexed ()) {
5979  auto gblColInds = getGlobalIndsViewHost(rowInfo);
5980  if (final) {
5981  for (LO k = 0; k < numEnt; ++k) {
5982  const GO gblColInd = gblColInds(k);
5983  // Pack it, even if it's wrong. Let the receiving
5984  // process deal with it. Otherwise, we'll miss out
5985  // on any correct data.
5986  exports_h(curOffset + k) = gblColInd;
5987  } // for each entry in the row
5988  } // final pass?
5989  exportsOffset = curOffset + numEnt;
5990  }
5991  // If neither globally nor locally indexed, then the graph
5992  // has no entries in this row (or indeed, in any row on this
5993  // process) to pack.
5994  });
5996 
5997  // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5998  // (errCount != 0, std::logic_error, "Packing encountered "
5999  // "one or more errors! errCount = " << errCount
6000  // << ", totalNumPackets = " << totalNumPackets << ".");
6001 
6002  if (verbose) {
6003  std::ostringstream os;
6004  os << *prefix << "errCount=" << errCount << "; Done" << endl;
6005  std::cerr << os.str();
6006  }
6007  }
6008 
6009  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6010  void
6013  (const Kokkos::DualView<const local_ordinal_type*,
6014  buffer_device_type>& importLIDs,
6015  Kokkos::DualView<packet_type*,
6016  buffer_device_type> imports,
6017  Kokkos::DualView<size_t*,
6018  buffer_device_type> numPacketsPerLID,
6019  const size_t /* constantNumPackets */,
6020  const CombineMode /* combineMode */ )
6021  {
6023  using std::endl;
6024  using LO = local_ordinal_type;
6025  using GO = global_ordinal_type;
6026  const char tfecfFuncName[] = "unpackAndCombine";
6027 
6028  ProfilingRegion regionCGC("Tpetra::CrsGraph::unpackAndCombine");
6029  const bool verbose = verbose_;
6030 
6031  std::unique_ptr<std::string> prefix;
6032  if (verbose) {
6033  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
6034  std::ostringstream os;
6035  os << *prefix << "Start" << endl;
6036  std::cerr << os.str ();
6037  }
6038  {
6039  auto padding = computeCrsPaddingForImports(
6040  importLIDs, imports, numPacketsPerLID, verbose);
6041  applyCrsPadding(*padding, verbose);
6042  if (verbose) {
6043  std::ostringstream os;
6044  os << *prefix << "Done computing & applying padding" << endl;
6045  std::cerr << os.str ();
6046  }
6047  }
6048 
6049  // FIXME (mfh 02 Apr 2012) REPLACE combine mode has a perfectly
6050  // reasonable meaning, whether or not the matrix is fill complete.
6051  // It's just more work to implement.
6052 
6053  // We are not checking the value of the CombineMode input
6054  // argument. For CrsGraph, we only support import/export
6055  // operations if fillComplete has not yet been called. Any
6056  // incoming column-indices are inserted into the target graph. In
6057  // this context, CombineMode values of ADD vs INSERT are
6058  // equivalent. What is the meaning of REPLACE for CrsGraph? If a
6059  // duplicate column-index is inserted, it will be compressed out
6060  // when fillComplete is called.
6061  //
6062  // Note: I think REPLACE means that an existing row is replaced by
6063  // the imported row, i.e., the existing indices are cleared. CGB,
6064  // 6/17/2010
6065 
6066  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6067  (importLIDs.extent (0) != numPacketsPerLID.extent (0),
6068  std::runtime_error, ": importLIDs.extent(0) = "
6069  << importLIDs.extent (0) << " != numPacketsPerLID.extent(0) = "
6070  << numPacketsPerLID.extent (0) << ".");
6071  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6072  (isFillComplete (), std::runtime_error,
6073  ": Import or Export operations are not allowed on a target "
6074  "CrsGraph that is fillComplete.");
6075 
6076  const size_t numImportLIDs(importLIDs.extent(0));
6077  if (numPacketsPerLID.need_sync_host()) {
6078  numPacketsPerLID.sync_host();
6079  }
6080  auto numPacketsPerLID_h = numPacketsPerLID.view_host();
6081  if (imports.need_sync_host()) {
6082  imports.sync_host();
6083  }
6084  auto imports_h = imports.view_host();
6085  TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
6086  auto importLIDs_h = importLIDs.view_host();
6087 
6088  // If we're inserting in local indices, let's pre-allocate
6089  Teuchos::Array<LO> lclColInds;
6090  if (isLocallyIndexed()) {
6091  if (verbose) {
6092  std::ostringstream os;
6093  os << *prefix << "Preallocate local indices scratch" << endl;
6094  std::cerr << os.str();
6095  }
6096  size_t maxNumInserts = 0;
6097  for (size_t i = 0; i < numImportLIDs; ++i) {
6098  maxNumInserts = std::max (maxNumInserts, numPacketsPerLID_h[i]);
6099  }
6100  if (verbose) {
6101  std::ostringstream os;
6102  os << *prefix << "Local indices scratch size: "
6103  << maxNumInserts << endl;
6104  std::cerr << os.str();
6105  }
6106  lclColInds.resize (maxNumInserts);
6107  }
6108  else {
6109  if (verbose) {
6110  std::ostringstream os;
6111  os << *prefix;
6112  if (isGloballyIndexed()) {
6113  os << "Graph is globally indexed";
6114  }
6115  else {
6116  os << "Graph is neither locally nor globally indexed";
6117  }
6118  os << endl;
6119  std::cerr << os.str();
6120  }
6121  }
6122 
6123  TEUCHOS_ASSERT( ! rowMap_.is_null() );
6124  const map_type& rowMap = *rowMap_;
6125 
6126  try {
6127  size_t importsOffset = 0;
6128  for (size_t i = 0; i < numImportLIDs; ++i) {
6129  if (verbose) {
6130  std::ostringstream os;
6131  os << *prefix << "i=" << i << ", numImportLIDs="
6132  << numImportLIDs << endl;
6133  std::cerr << os.str();
6134  }
6135  // We can only unpack into owned rows, since we only have
6136  // local row indices.
6137  const LO lclRow = importLIDs_h[i];
6138  const GO gblRow = rowMap.getGlobalElement(lclRow);
6139  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6140  (gblRow == Teuchos::OrdinalTraits<GO>::invalid(),
6141  std::logic_error, "importLIDs[i=" << i << "]="
6142  << lclRow << " is not in the row Map on the calling "
6143  "process.");
6144  const LO numEnt = numPacketsPerLID_h[i];
6145  const GO* const gblColInds = (numEnt == 0) ? nullptr :
6146  imports_h.data() + importsOffset;
6147  if (! isLocallyIndexed()) {
6148  insertGlobalIndicesFiltered(lclRow, gblColInds, numEnt);
6149  }
6150  else {
6151  // FIXME (mfh 09 Feb 2020) Now would be a good time to do
6152  // column Map filtering.
6153  for (LO j = 0; j < numEnt; j++) {
6154  lclColInds[j] = colMap_->getLocalElement(gblColInds[j]);
6155  }
6156  insertLocalIndices(lclRow, numEnt, lclColInds.data());
6157  }
6158  importsOffset += numEnt;
6159  }
6160  }
6161  catch (std::exception& e) {
6162  TEUCHOS_TEST_FOR_EXCEPTION
6163  (true, std::runtime_error,
6164  "Tpetra::CrsGraph::unpackAndCombine: Insert loop threw an "
6165  "exception: " << endl << e.what());
6166  }
6167 
6168  if (verbose) {
6169  std::ostringstream os;
6170  os << *prefix << "Done" << endl;
6171  std::cerr << os.str();
6172  }
6173  }
6174 
6175  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6176  void
6178  removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& newMap)
6179  {
6180  using Teuchos::Comm;
6181  using Teuchos::null;
6182  using Teuchos::ParameterList;
6183  using Teuchos::RCP;
6184 
6185  // We'll set all the state "transactionally," so that this method
6186  // satisfies the strong exception guarantee. This object's state
6187  // won't be modified until the end of this method.
6188  RCP<const map_type> rowMap, domainMap, rangeMap, colMap;
6189  RCP<import_type> importer;
6190  RCP<export_type> exporter;
6191 
6192  rowMap = newMap;
6193  RCP<const Comm<int> > newComm =
6194  (newMap.is_null ()) ? null : newMap->getComm ();
6195 
6196  if (! domainMap_.is_null ()) {
6197  if (domainMap_.getRawPtr () == rowMap_.getRawPtr ()) {
6198  // Common case: original domain and row Maps are identical.
6199  // In that case, we need only replace the original domain Map
6200  // with the new Map. This ensures that the new domain and row
6201  // Maps _stay_ identical.
6202  domainMap = newMap;
6203  } else {
6204  domainMap = domainMap_->replaceCommWithSubset (newComm);
6205  }
6206  }
6207  if (! rangeMap_.is_null ()) {
6208  if (rangeMap_.getRawPtr () == rowMap_.getRawPtr ()) {
6209  // Common case: original range and row Maps are identical. In
6210  // that case, we need only replace the original range Map with
6211  // the new Map. This ensures that the new range and row Maps
6212  // _stay_ identical.
6213  rangeMap = newMap;
6214  } else {
6215  rangeMap = rangeMap_->replaceCommWithSubset (newComm);
6216  }
6217  }
6218  if (! colMap_.is_null ()) {
6219  colMap = colMap_->replaceCommWithSubset (newComm);
6220  }
6221 
6222  // (Re)create the Export and / or Import if necessary.
6223  if (! newComm.is_null ()) {
6224  RCP<ParameterList> params = this->getNonconstParameterList (); // could be null
6225  //
6226  // The operations below are collective on the new communicator.
6227  //
6228  // (Re)create the Export object if necessary. If I haven't
6229  // called fillComplete yet, I don't have a rangeMap, so I must
6230  // first check if the _original_ rangeMap is not null. Ditto
6231  // for the Import object and the domain Map.
6232  if (! rangeMap_.is_null () &&
6233  rangeMap != rowMap &&
6234  ! rangeMap->isSameAs (*rowMap)) {
6235  if (params.is_null () || ! params->isSublist ("Export")) {
6236  exporter = rcp (new export_type (rowMap, rangeMap));
6237  }
6238  else {
6239  RCP<ParameterList> exportSublist = sublist (params, "Export", true);
6240  exporter = rcp (new export_type (rowMap, rangeMap, exportSublist));
6241  }
6242  }
6243  // (Re)create the Import object if necessary.
6244  if (! domainMap_.is_null () &&
6245  domainMap != colMap &&
6246  ! domainMap->isSameAs (*colMap)) {
6247  if (params.is_null () || ! params->isSublist ("Import")) {
6248  importer = rcp (new import_type (domainMap, colMap));
6249  } else {
6250  RCP<ParameterList> importSublist = sublist (params, "Import", true);
6251  importer = rcp (new import_type (domainMap, colMap, importSublist));
6252  }
6253  }
6254  } // if newComm is not null
6255 
6256  // Defer side effects until the end. If no destructors throw
6257  // exceptions (they shouldn't anyway), then this method satisfies
6258  // the strong exception guarantee.
6259  exporter_ = exporter;
6260  importer_ = importer;
6261  rowMap_ = rowMap;
6262  // mfh 31 Mar 2013: DistObject's map_ is the row Map of a CrsGraph
6263  // or CrsMatrix. CrsGraph keeps a redundant pointer (rowMap_) to
6264  // the same object. We might want to get rid of this redundant
6265  // pointer sometime, but for now, we'll leave it alone and just
6266  // set map_ to the same object.
6267  this->map_ = rowMap;
6268  domainMap_ = domainMap;
6269  rangeMap_ = rangeMap;
6270  colMap_ = colMap;
6271  }
6272 
6273  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6274  void
6276  getLocalDiagOffsets (const Kokkos::View<size_t*, device_type, Kokkos::MemoryUnmanaged>& offsets) const
6277  {
6278  using std::endl;
6279  using LO = LocalOrdinal;
6280  using GO = GlobalOrdinal;
6281  const char tfecfFuncName[] = "getLocalDiagOffsets: ";
6282  const bool verbose = verbose_;
6283 
6284  std::unique_ptr<std::string> prefix;
6285  if (verbose) {
6286  prefix = this->createPrefix("CrsGraph", "getLocalDiagOffsets");
6287  std::ostringstream os;
6288  os << *prefix << "offsets.extent(0)=" << offsets.extent(0)
6289  << endl;
6290  std::cerr << os.str();
6291  }
6292 
6293  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6294  (! hasColMap (), std::runtime_error, "The graph must have a column Map.");
6295  const LO lclNumRows = static_cast<LO> (this->getLocalNumRows ());
6296  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6297  (static_cast<LO> (offsets.extent (0)) < lclNumRows,
6298  std::invalid_argument, "offsets.extent(0) = " <<
6299  offsets.extent (0) << " < getLocalNumRows() = " << lclNumRows << ".");
6300 
6301  const map_type& rowMap = * (this->getRowMap ());
6302  const map_type& colMap = * (this->getColMap ());
6303 
6304  // We only use these in debug mode, but since debug mode is a
6305  // run-time option, they need to exist here. That's why we create
6306  // the vector with explicit size zero, to avoid overhead if debug
6307  // mode is off.
6308  bool allRowMapDiagEntriesInColMap = true;
6309  bool allDiagEntriesFound = true;
6310  bool allOffsetsCorrect = true;
6311  bool noOtherWeirdness = true;
6312  using wrong_offsets_type = std::vector<std::pair<LO, size_t> >;
6313  wrong_offsets_type wrongOffsets(0);
6314 
6315  // mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just
6316  // the subset of Map functionality that we need below.
6317  auto lclRowMap = rowMap.getLocalMap ();
6318  auto lclColMap = colMap.getLocalMap ();
6319 
6320  // FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this
6321  // setup, at least on the host. For CUDA, we have to use LocalMap
6322  // (that comes from each of the two Maps).
6323 
6324  const bool sorted = this->isSorted ();
6325  if (isFillComplete ()) {
6326  auto lclGraph = this->getLocalGraphDevice ();
6327  ::Tpetra::Details::getGraphDiagOffsets (offsets, lclRowMap, lclColMap,
6328  lclGraph.row_map,
6329  lclGraph.entries, sorted);
6330  }
6331  else {
6332  // NOTE (mfh 22 Feb 2017): We have to run this code on host,
6333  // since the graph is not fill complete. The previous version
6334  // of this code assumed UVM; this version does not.
6335  auto offsets_h = Kokkos::create_mirror_view (offsets);
6336 
6337  for (LO lclRowInd = 0; lclRowInd < lclNumRows; ++lclRowInd) {
6338  // Find the diagonal entry. Since the row Map and column Map
6339  // may differ, we have to compare global row and column
6340  // indices, not local.
6341  const GO gblRowInd = lclRowMap.getGlobalElement (lclRowInd);
6342  const GO gblColInd = gblRowInd;
6343  const LO lclColInd = lclColMap.getLocalElement (gblColInd);
6344 
6345  if (lclColInd == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
6346  allRowMapDiagEntriesInColMap = false;
6347  offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
6348  }
6349  else {
6350  const RowInfo rowInfo = this->getRowInfo (lclRowInd);
6351  if (static_cast<LO> (rowInfo.localRow) == lclRowInd &&
6352  rowInfo.numEntries > 0) {
6353 
6354  auto colInds = this->getLocalIndsViewHost (rowInfo);
6355  const size_t hint = 0; // not needed for this algorithm
6356  const size_t offset =
6357  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
6358  lclColInd, hint, sorted);
6359  offsets_h(lclRowInd) = offset;
6360 
6361  if (debug_) {
6362  // Now that we have what we think is an offset, make sure
6363  // that it really does point to the diagonal entry. Offsets
6364  // are _relative_ to each row, not absolute (for the whole
6365  // (local) graph).
6366  typename local_inds_dualv_type::t_host::const_type lclColInds;
6367  try {
6368  lclColInds = this->getLocalIndsViewHost (rowInfo);
6369  }
6370  catch (...) {
6371  noOtherWeirdness = false;
6372  }
6373  // Don't continue with error checking if the above failed.
6374  if (noOtherWeirdness) {
6375  const size_t numEnt = lclColInds.extent (0);
6376  if (offset >= numEnt) {
6377  // Offsets are relative to each row, so this means that
6378  // the offset is out of bounds.
6379  allOffsetsCorrect = false;
6380  wrongOffsets.push_back (std::make_pair (lclRowInd, offset));
6381  } else {
6382  const LO actualLclColInd = lclColInds(offset);
6383  const GO actualGblColInd = lclColMap.getGlobalElement (actualLclColInd);
6384  if (actualGblColInd != gblColInd) {
6385  allOffsetsCorrect = false;
6386  wrongOffsets.push_back (std::make_pair (lclRowInd, offset));
6387  }
6388  }
6389  }
6390  } // debug_
6391  }
6392  else { // either row is empty, or something went wrong w/ getRowInfo()
6393  offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
6394  allDiagEntriesFound = false;
6395  }
6396  } // whether lclColInd is a valid local column index
6397  } // for each local row
6398  // DEEP_COPY REVIEW - NOT TESTED
6399  Kokkos::deep_copy (offsets, offsets_h);
6400  } // whether the graph is fill complete
6401 
6402  if (verbose && wrongOffsets.size () != 0) {
6403  std::ostringstream os;
6404  os << *prefix << "Wrong offsets: [";
6405  for (size_t k = 0; k < wrongOffsets.size (); ++k) {
6406  os << "(" << wrongOffsets[k].first << ","
6407  << wrongOffsets[k].second << ")";
6408  if (k + 1 < wrongOffsets.size ()) {
6409  os << ", ";
6410  }
6411  }
6412  os << "]" << endl;
6413  std::cerr << os.str();
6414  }
6415 
6416  if (debug_) {
6417  using Teuchos::reduceAll;
6418  using std::endl;
6419  Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getComm ();
6420  const bool localSuccess =
6421  allRowMapDiagEntriesInColMap && allDiagEntriesFound && allOffsetsCorrect;
6422  const int numResults = 5;
6423  int lclResults[5];
6424  lclResults[0] = allRowMapDiagEntriesInColMap ? 1 : 0;
6425  lclResults[1] = allDiagEntriesFound ? 1 : 0;
6426  lclResults[2] = allOffsetsCorrect ? 1 : 0;
6427  lclResults[3] = noOtherWeirdness ? 1 : 0;
6428  // min-all-reduce will compute least rank of all the processes
6429  // that didn't succeed.
6430  lclResults[4] = ! localSuccess ? comm->getRank () : comm->getSize ();
6431 
6432  int gblResults[5];
6433  gblResults[0] = 0;
6434  gblResults[1] = 0;
6435  gblResults[2] = 0;
6436  gblResults[3] = 0;
6437  gblResults[4] = 0;
6438  reduceAll<int, int> (*comm, Teuchos::REDUCE_MIN,
6439  numResults, lclResults, gblResults);
6440 
6441  if (gblResults[0] != 1 || gblResults[1] != 1 || gblResults[2] != 1
6442  || gblResults[3] != 1) {
6443  std::ostringstream os; // build error message
6444  os << "Issue(s) that we noticed (on Process " << gblResults[4] << ", "
6445  "possibly among others): " << endl;
6446  if (gblResults[0] == 0) {
6447  os << " - The column Map does not contain at least one diagonal entry "
6448  "of the graph." << endl;
6449  }
6450  if (gblResults[1] == 0) {
6451  os << " - On one or more processes, some row does not contain a "
6452  "diagonal entry." << endl;
6453  }
6454  if (gblResults[2] == 0) {
6455  os << " - On one or more processes, some offsets are incorrect."
6456  << endl;
6457  }
6458  if (gblResults[3] == 0) {
6459  os << " - One or more processes had some other error."
6460  << endl;
6461  }
6462  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str());
6463  }
6464  } // debug_
6465  }
6466 
6467  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6468  void
6470  getLocalOffRankOffsets (offset_device_view_type& offsets) const
6471  {
6472  using std::endl;
6473  const char tfecfFuncName[] = "getLocalOffRankOffsets: ";
6474  const bool verbose = verbose_;
6475 
6476  std::unique_ptr<std::string> prefix;
6477  if (verbose) {
6478  prefix = this->createPrefix("CrsGraph", "getLocalOffRankOffsets");
6479  std::ostringstream os;
6480  os << *prefix << "offsets.extent(0)=" << offsets.extent(0)
6481  << endl;
6482  std::cerr << os.str();
6483  }
6484 
6485  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6486  (! hasColMap (), std::runtime_error, "The graph must have a column Map.");
6487  // Instead of throwing, we could also copy the rowPtr to k_offRankOffsets_.
6488 
6489  const size_t lclNumRows = this->getLocalNumRows ();
6490 
6491  if (haveLocalOffRankOffsets_ && k_offRankOffsets_.extent(0) == lclNumRows+1) {
6492  offsets = k_offRankOffsets_;
6493  return;
6494  }
6495  haveLocalOffRankOffsets_ = false;
6496 
6497  const map_type& colMap = * (this->getColMap ());
6498  const map_type& domMap = * (this->getDomainMap ());
6499 
6500  // mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just
6501  // the subset of Map functionality that we need below.
6502  auto lclColMap = colMap.getLocalMap ();
6503  auto lclDomMap = domMap.getLocalMap ();
6504 
6505  // FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this
6506  // setup, at least on the host. For CUDA, we have to use LocalMap
6507  // (that comes from each of the two Maps).
6508 
6509  TEUCHOS_ASSERT(this->isSorted ());
6510  if (isFillComplete ()) {
6511  k_offRankOffsets_ = offset_device_view_type(Kokkos::ViewAllocateWithoutInitializing("offRankOffset"), lclNumRows+1);
6512  auto lclGraph = this->getLocalGraphDevice ();
6513  ::Tpetra::Details::getGraphOffRankOffsets (k_offRankOffsets_,
6514  lclColMap, lclDomMap,
6515  lclGraph);
6516  offsets = k_offRankOffsets_;
6517  haveLocalOffRankOffsets_ = true;
6518  } else {
6519  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6520  (true, std::logic_error, "Can't get off-rank offsets for non-fill-complete graph");
6521  }
6522  }
6523 
6524  namespace { // (anonymous)
6525 
6526  // mfh 21 Jan 2016: This is useful for getLocalDiagOffsets (see
6527  // below). The point is to avoid the deep copy between the input
6528  // Teuchos::ArrayRCP and the internally used Kokkos::View. We
6529  // can't use UVM to avoid the deep copy with CUDA, because the
6530  // ArrayRCP is a host pointer, while the input to the graph's
6531  // getLocalDiagOffsets method is a device pointer. Assigning a
6532  // host pointer to a device pointer is incorrect unless the host
6533  // pointer points to host pinned memory. The goal is to get rid
6534  // of the Teuchos::ArrayRCP overload anyway, so we accept the deep
6535  // copy for backwards compatibility.
6536  //
6537  // We have to use template magic because
6538  // "staticGraph_->getLocalDiagOffsets(offsetsHosts)" won't compile
6539  // if device_type::memory_space is not Kokkos::HostSpace (as is
6540  // the case with CUDA).
6541 
6542  template<class DeviceType,
6543  const bool memSpaceIsHostSpace =
6544  std::is_same<typename DeviceType::memory_space,
6545  Kokkos::HostSpace>::value>
6546  struct HelpGetLocalDiagOffsets {};
6547 
6548  template<class DeviceType>
6549  struct HelpGetLocalDiagOffsets<DeviceType, true> {
6550  typedef DeviceType device_type;
6551  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6552  Kokkos::MemoryUnmanaged> device_offsets_type;
6553  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6554  Kokkos::MemoryUnmanaged> host_offsets_type;
6555 
6556  static device_offsets_type
6557  getDeviceOffsets (const host_offsets_type& hostOffsets)
6558  {
6559  // Host and device are the same; no need to allocate a
6560  // temporary device View.
6561  return hostOffsets;
6562  }
6563 
6564  static void
6565  copyBackIfNeeded (const host_offsets_type& /* hostOffsets */,
6566  const device_offsets_type& /* deviceOffsets */)
6567  { /* copy back not needed; host and device are the same */ }
6568  };
6569 
6570  template<class DeviceType>
6571  struct HelpGetLocalDiagOffsets<DeviceType, false> {
6572  typedef DeviceType device_type;
6573  // We have to do a deep copy, since host memory space != device
6574  // memory space. Thus, the device View is managed (we need to
6575  // allocate a temporary device View).
6576  typedef Kokkos::View<size_t*, device_type> device_offsets_type;
6577  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6578  Kokkos::MemoryUnmanaged> host_offsets_type;
6579 
6580  static device_offsets_type
6581  getDeviceOffsets (const host_offsets_type& hostOffsets)
6582  {
6583  // Host memory space != device memory space, so we must
6584  // allocate a temporary device View for the graph.
6585  return device_offsets_type ("offsets", hostOffsets.extent (0));
6586  }
6587 
6588  static void
6589  copyBackIfNeeded (const host_offsets_type& hostOffsets,
6590  const device_offsets_type& deviceOffsets)
6591  {
6592  // DEEP_COPY REVIEW - NOT TESTED
6593  Kokkos::deep_copy (hostOffsets, deviceOffsets);
6594  }
6595  };
6596  } // namespace (anonymous)
6597 
6598 
6599  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6600  void
6602  getLocalDiagOffsets (Teuchos::ArrayRCP<size_t>& offsets) const
6603  {
6604  typedef LocalOrdinal LO;
6605  const char tfecfFuncName[] = "getLocalDiagOffsets: ";
6606  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6607  (! this->hasColMap (), std::runtime_error,
6608  "The graph does not yet have a column Map.");
6609  const LO myNumRows = static_cast<LO> (this->getLocalNumRows ());
6610  if (static_cast<LO> (offsets.size ()) != myNumRows) {
6611  // NOTE (mfh 21 Jan 2016) This means that the method does not
6612  // satisfy the strong exception guarantee (no side effects
6613  // unless successful).
6614  offsets.resize (myNumRows);
6615  }
6616 
6617  // mfh 21 Jan 2016: This method unfortunately takes a
6618  // Teuchos::ArrayRCP, which is host memory. The graph wants a
6619  // device pointer. We can't access host memory from the device;
6620  // that's the wrong direction for UVM. (It's the right direction
6621  // for inefficient host pinned memory, but we don't want to use
6622  // that here.) Thus, if device memory space != host memory space,
6623  // we allocate and use a temporary device View to get the offsets.
6624  // If the two spaces are equal, the template magic makes the deep
6625  // copy go away.
6626  typedef HelpGetLocalDiagOffsets<device_type> helper_type;
6627  typedef typename helper_type::host_offsets_type host_offsets_type;
6628  // Unmanaged host View that views the output array.
6629  host_offsets_type hostOffsets (offsets.getRawPtr (), myNumRows);
6630  // Allocate temp device View if host != device, else reuse host array.
6631  auto deviceOffsets = helper_type::getDeviceOffsets (hostOffsets);
6632  // NOT recursion; this calls the overload that takes a device View.
6633  this->getLocalDiagOffsets (deviceOffsets);
6634  helper_type::copyBackIfNeeded (hostOffsets, deviceOffsets);
6635  }
6636 
6637  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6638  bool
6641  return true;
6642  }
6643 
6644  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6645  void
6648  const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& rowTransfer,
6649  const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node> > & domainTransfer,
6650  const Teuchos::RCP<const map_type>& domainMap,
6651  const Teuchos::RCP<const map_type>& rangeMap,
6652  const Teuchos::RCP<Teuchos::ParameterList>& params) const
6653  {
6658  using Teuchos::ArrayRCP;
6659  using Teuchos::ArrayView;
6660  using Teuchos::Comm;
6661  using Teuchos::ParameterList;
6662  using Teuchos::rcp;
6663  using Teuchos::RCP;
6664 #ifdef HAVE_TPETRA_MMM_TIMINGS
6665  using std::string;
6666  using Teuchos::TimeMonitor;
6667 #endif
6668 
6669  using LO = LocalOrdinal;
6670  using GO = GlobalOrdinal;
6671  using NT = node_type;
6672  using this_CRS_type = CrsGraph<LO, GO, NT>;
6673  using ivector_type = Vector<int, LO, GO, NT>;
6674 
6675  const char* prefix = "Tpetra::CrsGraph::transferAndFillComplete: ";
6676 
6677 #ifdef HAVE_TPETRA_MMM_TIMINGS
6678  string label;
6679  if(!params.is_null()) label = params->get("Timer Label", label);
6680  string prefix2 = string("Tpetra ")+ label + std::string(": CrsGraph TAFC ");
6681  RCP<TimeMonitor> MM =
6682  rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Pack-1"))));
6683 #endif
6684 
6685  // Make sure that the input argument rowTransfer is either an
6686  // Import or an Export. Import and Export are the only two
6687  // subclasses of Transfer that we defined, but users might
6688  // (unwisely, for now at least) decide to implement their own
6689  // subclasses. Exclude this possibility.
6690  const import_type* xferAsImport = dynamic_cast<const import_type*>(&rowTransfer);
6691  const export_type* xferAsExport = dynamic_cast<const export_type*>(&rowTransfer);
6692  TEUCHOS_TEST_FOR_EXCEPTION(
6693  xferAsImport == nullptr && xferAsExport == nullptr, std::invalid_argument,
6694  prefix << "The 'rowTransfer' input argument must be either an Import or "
6695  "an Export, and its template parameters must match the corresponding "
6696  "template parameters of the CrsGraph.");
6697 
6698  // Make sure that the input argument domainTransfer is either an
6699  // Import or an Export. Import and Export are the only two
6700  // subclasses of Transfer that we defined, but users might
6701  // (unwisely, for now at least) decide to implement their own
6702  // subclasses. Exclude this possibility.
6703  Teuchos::RCP<const import_type> xferDomainAsImport =
6704  Teuchos::rcp_dynamic_cast<const import_type>(domainTransfer);
6705  Teuchos::RCP<const export_type> xferDomainAsExport =
6706  Teuchos::rcp_dynamic_cast<const export_type>(domainTransfer);
6707 
6708  if(! domainTransfer.is_null()) {
6709 
6710  TEUCHOS_TEST_FOR_EXCEPTION(
6711  (xferDomainAsImport.is_null() && xferDomainAsExport.is_null()), std::invalid_argument,
6712  prefix << "The 'domainTransfer' input argument must be either an "
6713  "Import or an Export, and its template parameters must match the "
6714  "corresponding template parameters of the CrsGraph.");
6715 
6716  TEUCHOS_TEST_FOR_EXCEPTION(
6717  ( xferAsImport != nullptr || ! xferDomainAsImport.is_null() ) &&
6718  (( xferAsImport != nullptr && xferDomainAsImport.is_null() ) ||
6719  ( xferAsImport == nullptr && ! xferDomainAsImport.is_null() )), std::invalid_argument,
6720  prefix << "The 'rowTransfer' and 'domainTransfer' input arguments "
6721  "must be of the same type (either Import or Export).");
6722 
6723  TEUCHOS_TEST_FOR_EXCEPTION(
6724  ( xferAsExport != nullptr || ! xferDomainAsExport.is_null() ) &&
6725  (( xferAsExport != nullptr && xferDomainAsExport.is_null() ) ||
6726  ( xferAsExport == nullptr && ! xferDomainAsExport.is_null() )), std::invalid_argument,
6727  prefix << "The 'rowTransfer' and 'domainTransfer' input arguments "
6728  "must be of the same type (either Import or Export).");
6729 
6730  } // domainTransfer != null
6731 
6732 
6733  // FIXME (mfh 15 May 2014) Wouldn't communication still be needed,
6734  // if the source Map is not distributed but the target Map is?
6735  const bool communication_needed = rowTransfer.getSourceMap()->isDistributed();
6736 
6737  //
6738  // Get the caller's parameters
6739  //
6740 
6741  bool reverseMode = false; // Are we in reverse mode?
6742  bool restrictComm = false; // Do we need to restrict the communicator?
6743  RCP<ParameterList> graphparams; // parameters for the destination graph
6744  if (! params.is_null()) {
6745  reverseMode = params->get("Reverse Mode", reverseMode);
6746  restrictComm = params->get("Restrict Communicator", restrictComm);
6747  graphparams = sublist(params, "CrsGraph");
6748  }
6749 
6750  // Get the new domain and range Maps. We need some of them for error
6751  // checking, now that we have the reverseMode parameter.
6752  RCP<const map_type> MyRowMap = reverseMode ?
6753  rowTransfer.getSourceMap() : rowTransfer.getTargetMap();
6754  RCP<const map_type> MyColMap; // create this below
6755  RCP<const map_type> MyDomainMap = ! domainMap.is_null() ? domainMap : getDomainMap();
6756  RCP<const map_type> MyRangeMap = ! rangeMap.is_null() ? rangeMap : getRangeMap();
6757  RCP<const map_type> BaseRowMap = MyRowMap;
6758  RCP<const map_type> BaseDomainMap = MyDomainMap;
6759 
6760  // If the user gave us a nonnull destGraph, then check whether it's
6761  // "pristine." That means that it has no entries.
6762  //
6763  // FIXME (mfh 15 May 2014) If this is not true on all processes,
6764  // then this exception test may hang. It would be better to
6765  // forward an error flag to the next communication phase.
6766  if (! destGraph.is_null()) {
6767  // FIXME (mfh 15 May 2014): The Epetra idiom for checking
6768  // whether a graph or matrix has no entries on the calling
6769  // process, is that it is neither locally nor globally indexed.
6770  // This may change eventually with the Kokkos refactor version
6771  // of Tpetra, so it would be better just to check the quantity
6772  // of interest directly. Note that with the Kokkos refactor
6773  // version of Tpetra, asking for the total number of entries in
6774  // a graph or matrix that is not fill complete might require
6775  // computation (kernel launch), since it is not thread scalable
6776  // to update a count every time an entry is inserted.
6777  const bool NewFlag =
6778  ! destGraph->isLocallyIndexed() && ! destGraph->isGloballyIndexed();
6779  TEUCHOS_TEST_FOR_EXCEPTION(! NewFlag, std::invalid_argument,
6780  prefix << "The input argument 'destGraph' is only allowed to be nonnull, "
6781  "if its graph is empty (neither locally nor globally indexed).");
6782 
6783  // FIXME (mfh 15 May 2014) At some point, we want to change
6784  // graphs and matrices so that their DistObject Map
6785  // (this->getMap()) may differ from their row Map. This will
6786  // make redistribution for 2-D distributions more efficient. I
6787  // hesitate to change this check, because I'm not sure how much
6788  // the code here depends on getMap() and getRowMap() being the
6789  // same.
6790  TEUCHOS_TEST_FOR_EXCEPTION(
6791  ! destGraph->getRowMap()->isSameAs(*MyRowMap), std::invalid_argument,
6792  prefix << "The (row) Map of the input argument 'destGraph' is not the "
6793  "same as the (row) Map specified by the input argument 'rowTransfer'.");
6794 
6795  TEUCHOS_TEST_FOR_EXCEPTION(
6796  ! destGraph->checkSizes(*this), std::invalid_argument,
6797  prefix << "You provided a nonnull destination graph, but checkSizes() "
6798  "indicates that it is not a legal legal target for redistribution from "
6799  "the source graph (*this). This may mean that they do not have the "
6800  "same dimensions.");
6801  }
6802 
6803  // If forward mode (the default), then *this's (row) Map must be
6804  // the same as the source Map of the Transfer. If reverse mode,
6805  // then *this's (row) Map must be the same as the target Map of
6806  // the Transfer.
6807  //
6808  // FIXME (mfh 15 May 2014) At some point, we want to change graphs
6809  // and matrices so that their DistObject Map (this->getMap()) may
6810  // differ from their row Map. This will make redistribution for
6811  // 2-D distributions more efficient. I hesitate to change this
6812  // check, because I'm not sure how much the code here depends on
6813  // getMap() and getRowMap() being the same.
6814  TEUCHOS_TEST_FOR_EXCEPTION(
6815  ! (reverseMode || getRowMap()->isSameAs(*rowTransfer.getSourceMap())),
6816  std::invalid_argument, prefix <<
6817  "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
6818 
6819  TEUCHOS_TEST_FOR_EXCEPTION(
6820  ! (! reverseMode || getRowMap()->isSameAs(*rowTransfer.getTargetMap())),
6821  std::invalid_argument, prefix <<
6822  "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
6823 
6824  // checks for domainTransfer
6825  TEUCHOS_TEST_FOR_EXCEPTION(
6826  ! xferDomainAsImport.is_null() && ! xferDomainAsImport->getTargetMap()->isSameAs(*domainMap),
6827  std::invalid_argument,
6828  prefix << "The target map of the 'domainTransfer' input argument must be "
6829  "the same as the rebalanced domain map 'domainMap'");
6830 
6831  TEUCHOS_TEST_FOR_EXCEPTION(
6832  ! xferDomainAsExport.is_null() && ! xferDomainAsExport->getSourceMap()->isSameAs(*domainMap),
6833  std::invalid_argument,
6834  prefix << "The source map of the 'domainTransfer' input argument must be "
6835  "the same as the rebalanced domain map 'domainMap'");
6836 
6837  // The basic algorithm here is:
6838  //
6839  // 1. Call the moral equivalent of "Distor.do" to handle the import.
6840  // 2. Copy all the Imported and Copy/Permuted data into the raw
6841  // CrsGraph pointers, still using GIDs.
6842  // 3. Call an optimized version of MakeColMap that avoids the
6843  // Directory lookups (since the importer knows who owns all the
6844  // GIDs) AND reindexes to LIDs.
6845  // 4. Call expertStaticFillComplete()
6846 
6847  // Get information from the Importer
6848  const size_t NumSameIDs = rowTransfer.getNumSameIDs();
6849  ArrayView<const LO> ExportLIDs = reverseMode ?
6850  rowTransfer.getRemoteLIDs() : rowTransfer.getExportLIDs();
6851  ArrayView<const LO> RemoteLIDs = reverseMode ?
6852  rowTransfer.getExportLIDs() : rowTransfer.getRemoteLIDs();
6853  ArrayView<const LO> PermuteToLIDs = reverseMode ?
6854  rowTransfer.getPermuteFromLIDs() : rowTransfer.getPermuteToLIDs();
6855  ArrayView<const LO> PermuteFromLIDs = reverseMode ?
6856  rowTransfer.getPermuteToLIDs() : rowTransfer.getPermuteFromLIDs();
6857  Distributor& Distor = rowTransfer.getDistributor();
6858 
6859  // Owning PIDs
6860  Teuchos::Array<int> SourcePids;
6861  Teuchos::Array<int> TargetPids;
6862  int MyPID = getComm()->getRank();
6863 
6864  // Temp variables for sub-communicators
6865  RCP<const map_type> ReducedRowMap, ReducedColMap,
6866  ReducedDomainMap, ReducedRangeMap;
6867  RCP<const Comm<int> > ReducedComm;
6868 
6869  // If the user gave us a null destGraph, then construct the new
6870  // destination graph. We will replace its column Map later.
6871  if (destGraph.is_null()) {
6872  destGraph = rcp(new this_CRS_type(MyRowMap, 0, graphparams));
6873  }
6874 
6875  /***************************************************/
6876  /***** 1) First communicator restriction phase ****/
6877  /***************************************************/
6878  if (restrictComm) {
6879  ReducedRowMap = MyRowMap->removeEmptyProcesses();
6880  ReducedComm = ReducedRowMap.is_null() ?
6881  Teuchos::null :
6882  ReducedRowMap->getComm();
6883  destGraph->removeEmptyProcessesInPlace(ReducedRowMap);
6884 
6885  ReducedDomainMap = MyRowMap.getRawPtr() == MyDomainMap.getRawPtr() ?
6886  ReducedRowMap :
6887  MyDomainMap->replaceCommWithSubset(ReducedComm);
6888  ReducedRangeMap = MyRowMap.getRawPtr() == MyRangeMap.getRawPtr() ?
6889  ReducedRowMap :
6890  MyRangeMap->replaceCommWithSubset(ReducedComm);
6891 
6892  // Reset the "my" maps
6893  MyRowMap = ReducedRowMap;
6894  MyDomainMap = ReducedDomainMap;
6895  MyRangeMap = ReducedRangeMap;
6896 
6897  // Update my PID, if we've restricted the communicator
6898  if (! ReducedComm.is_null()) {
6899  MyPID = ReducedComm->getRank();
6900  }
6901  else {
6902  MyPID = -2; // For debugging
6903  }
6904  }
6905  else {
6906  ReducedComm = MyRowMap->getComm();
6907  }
6908 
6909  /***************************************************/
6910  /***** 2) From Tpera::DistObject::doTransfer() ****/
6911  /***************************************************/
6912 #ifdef HAVE_TPETRA_MMM_TIMINGS
6913  MM = Teuchos::null;
6914  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("ImportSetup"))));
6915 #endif
6916  // Get the owning PIDs
6917  RCP<const import_type> MyImporter = getImporter();
6918 
6919  // check whether domain maps of source graph and base domain map is the same
6920  bool bSameDomainMap = BaseDomainMap->isSameAs(*getDomainMap());
6921 
6922  if (! restrictComm && ! MyImporter.is_null() && bSameDomainMap ) {
6923  // Same domain map as source graph
6924  //
6925  // NOTE: This won't work for restrictComm (because the Import
6926  // doesn't know the restricted PIDs), though writing an
6927  // optimized version for that case would be easy (Import an
6928  // IntVector of the new PIDs). Might want to add this later.
6929  Import_Util::getPids(*MyImporter, SourcePids, false);
6930  }
6931  else if (restrictComm && ! MyImporter.is_null() && bSameDomainMap) {
6932  // Same domain map as source graph (restricted communicator)
6933  // We need one import from the domain to the column map
6934  ivector_type SourceDomain_pids(getDomainMap(),true);
6935  ivector_type SourceCol_pids(getColMap());
6936  // SourceDomain_pids contains the restricted pids
6937  SourceDomain_pids.putScalar(MyPID);
6938 
6939  SourceCol_pids.doImport(SourceDomain_pids, *MyImporter, INSERT);
6940  SourcePids.resize(getColMap()->getLocalNumElements());
6941  SourceCol_pids.get1dCopy(SourcePids());
6942  }
6943  else if (MyImporter.is_null() && bSameDomainMap) {
6944  // Graph has no off-process entries
6945  SourcePids.resize(getColMap()->getLocalNumElements());
6946  SourcePids.assign(getColMap()->getLocalNumElements(), MyPID);
6947  }
6948  else if ( ! MyImporter.is_null() &&
6949  ! domainTransfer.is_null() ) {
6950  // general implementation for rectangular matrices with
6951  // domain map different than SourceGraph domain map.
6952  // User has to provide a DomainTransfer object. We need
6953  // to communications (import/export)
6954 
6955  // TargetDomain_pids lives on the rebalanced new domain map
6956  ivector_type TargetDomain_pids(domainMap);
6957  TargetDomain_pids.putScalar(MyPID);
6958 
6959  // SourceDomain_pids lives on the non-rebalanced old domain map
6960  ivector_type SourceDomain_pids(getDomainMap());
6961 
6962  // SourceCol_pids lives on the non-rebalanced old column map
6963  ivector_type SourceCol_pids(getColMap());
6964 
6965  if (! reverseMode && ! xferDomainAsImport.is_null() ) {
6966  SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsImport, INSERT);
6967  }
6968  else if (reverseMode && ! xferDomainAsExport.is_null() ) {
6969  SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsExport, INSERT);
6970  }
6971  else if (! reverseMode && ! xferDomainAsExport.is_null() ) {
6972  SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsExport, INSERT);
6973  }
6974  else if (reverseMode && ! xferDomainAsImport.is_null() ) {
6975  SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsImport, INSERT);
6976  }
6977  else {
6978  TEUCHOS_TEST_FOR_EXCEPTION(
6979  true, std::logic_error,
6980  prefix << "Should never get here! Please report this bug to a Tpetra developer.");
6981  }
6982  SourceCol_pids.doImport(SourceDomain_pids, *MyImporter, INSERT);
6983  SourcePids.resize(getColMap()->getLocalNumElements());
6984  SourceCol_pids.get1dCopy(SourcePids());
6985  }
6986  else if (BaseDomainMap->isSameAs(*BaseRowMap) &&
6987  getDomainMap()->isSameAs(*getRowMap())) {
6988  // We can use the rowTransfer + SourceGraph's Import to find out who owns what.
6989  ivector_type TargetRow_pids(domainMap);
6990  ivector_type SourceRow_pids(getRowMap());
6991  ivector_type SourceCol_pids(getColMap());
6992 
6993  TargetRow_pids.putScalar(MyPID);
6994  if (! reverseMode && xferAsImport != nullptr) {
6995  SourceRow_pids.doExport(TargetRow_pids, *xferAsImport, INSERT);
6996  }
6997  else if (reverseMode && xferAsExport != nullptr) {
6998  SourceRow_pids.doExport(TargetRow_pids, *xferAsExport, INSERT);
6999  }
7000  else if (! reverseMode && xferAsExport != nullptr) {
7001  SourceRow_pids.doImport(TargetRow_pids, *xferAsExport, INSERT);
7002  }
7003  else if (reverseMode && xferAsImport != nullptr) {
7004  SourceRow_pids.doImport(TargetRow_pids, *xferAsImport, INSERT);
7005  }
7006  else {
7007  TEUCHOS_TEST_FOR_EXCEPTION(
7008  true, std::logic_error,
7009  prefix << "Should never get here! Please report this bug to a Tpetra developer.");
7010  }
7011  SourceCol_pids.doImport(SourceRow_pids, *MyImporter, INSERT);
7012  SourcePids.resize(getColMap()->getLocalNumElements());
7013  SourceCol_pids.get1dCopy(SourcePids());
7014  }
7015  else {
7016  TEUCHOS_TEST_FOR_EXCEPTION(
7017  true, std::invalid_argument,
7018  prefix << "This method only allows either domainMap == getDomainMap(), "
7019  "or (domainMap == rowTransfer.getTargetMap() and getDomainMap() == getRowMap()).");
7020  }
7021 
7022  // Tpetra-specific stuff
7023  size_t constantNumPackets = destGraph->constantNumberOfPackets();
7024  if (constantNumPackets == 0) {
7025  destGraph->reallocArraysForNumPacketsPerLid(ExportLIDs.size(),
7026  RemoteLIDs.size());
7027  }
7028  else {
7029  // There are a constant number of packets per element. We
7030  // already know (from the number of "remote" (incoming)
7031  // elements) how many incoming elements we expect, so we can
7032  // resize the buffer accordingly.
7033  const size_t rbufLen = RemoteLIDs.size() * constantNumPackets;
7034  destGraph->reallocImportsIfNeeded(rbufLen, false, nullptr);
7035  }
7036 
7037  {
7038  // packAndPrepare* methods modify numExportPacketsPerLID_.
7039  destGraph->numExportPacketsPerLID_.modify_host();
7040  Teuchos::ArrayView<size_t> numExportPacketsPerLID =
7041  getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
7042 
7043  // Pack & Prepare w/ owning PIDs
7044  packCrsGraphWithOwningPIDs(*this, destGraph->exports_,
7045  numExportPacketsPerLID, ExportLIDs,
7046  SourcePids, constantNumPackets);
7047  }
7048 
7049  // Do the exchange of remote data.
7050 #ifdef HAVE_TPETRA_MMM_TIMINGS
7051  MM = Teuchos::null;
7052  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Transfer"))));
7053 #endif
7054 
7055  if (communication_needed) {
7056  if (reverseMode) {
7057  if (constantNumPackets == 0) { // variable number of packets per LID
7058  // Make sure that host has the latest version, since we're
7059  // using the version on host. If host has the latest
7060  // version, syncing to host does nothing.
7061  destGraph->numExportPacketsPerLID_.sync_host();
7062  Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
7063  getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
7064  destGraph->numImportPacketsPerLID_.sync_host();
7065  Teuchos::ArrayView<size_t> numImportPacketsPerLID =
7066  getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
7067 
7068  Distor.doReversePostsAndWaits(destGraph->numExportPacketsPerLID_.view_host(), 1,
7069  destGraph->numImportPacketsPerLID_.view_host());
7070  size_t totalImportPackets = 0;
7071  for (Array_size_type i = 0; i < numImportPacketsPerLID.size(); ++i) {
7072  totalImportPackets += numImportPacketsPerLID[i];
7073  }
7074 
7075  // Reallocation MUST go before setting the modified flag,
7076  // because it may clear out the flags.
7077  destGraph->reallocImportsIfNeeded(totalImportPackets, false, nullptr);
7078  destGraph->imports_.modify_host();
7079  auto hostImports = destGraph->imports_.view_host();
7080  // This is a legacy host pack/unpack path, so use the host
7081  // version of exports_.
7082  destGraph->exports_.sync_host();
7083  auto hostExports = destGraph->exports_.view_host();
7084  Distor.doReversePostsAndWaits(hostExports,
7085  numExportPacketsPerLID,
7086  hostImports,
7087  numImportPacketsPerLID);
7088  }
7089  else { // constant number of packets per LI
7090  destGraph->imports_.modify_host();
7091  auto hostImports = destGraph->imports_.view_host();
7092  // This is a legacy host pack/unpack path, so use the host
7093  // version of exports_.
7094  destGraph->exports_.sync_host();
7095  auto hostExports = destGraph->exports_.view_host();
7096  Distor.doReversePostsAndWaits(hostExports,
7097  constantNumPackets,
7098  hostImports);
7099  }
7100  }
7101  else { // forward mode (the default)
7102  if (constantNumPackets == 0) { // variable number of packets per LID
7103  // Make sure that host has the latest version, since we're
7104  // using the version on host. If host has the latest
7105  // version, syncing to host does nothing.
7106  destGraph->numExportPacketsPerLID_.sync_host();
7107  destGraph->numImportPacketsPerLID_.sync_host();
7108  Distor.doPostsAndWaits(destGraph->numExportPacketsPerLID_.view_host(), 1,
7109  destGraph->numImportPacketsPerLID_.view_host());
7110 
7111  Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
7112  getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
7113  size_t totalImportPackets = 0;
7114  for (Array_size_type i = 0; i < numImportPacketsPerLID.size(); ++i) {
7115  totalImportPackets += numImportPacketsPerLID[i];
7116  }
7117 
7118  // Reallocation MUST go before setting the modified flag,
7119  // because it may clear out the flags.
7120  destGraph->reallocImportsIfNeeded(totalImportPackets, false, nullptr);
7121  destGraph->imports_.modify_host();
7122  auto hostImports = destGraph->imports_.view_host();
7123  // This is a legacy host pack/unpack path, so use the host
7124  // version of exports_.
7125  destGraph->exports_.sync_host();
7126  auto hostExports = destGraph->exports_.view_host();
7127  Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
7128  getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
7129  Distor.doPostsAndWaits(hostExports, numExportPacketsPerLID, hostImports, numImportPacketsPerLID);
7130  }
7131  else { // constant number of packets per LID
7132  destGraph->imports_.modify_host();
7133  auto hostImports = destGraph->imports_.view_host();
7134  // This is a legacy host pack/unpack path, so use the host
7135  // version of exports_.
7136  destGraph->exports_.sync_host();
7137  auto hostExports = destGraph->exports_.view_host();
7138  Distor.doPostsAndWaits(hostExports, constantNumPackets, hostImports);
7139  }
7140  }
7141  }
7142 
7143  /*********************************************************************/
7144  /**** 3) Copy all of the Same/Permute/Remote data into CSR_arrays ****/
7145  /*********************************************************************/
7146 
7147 #ifdef HAVE_TPETRA_MMM_TIMINGS
7148  MM = Teuchos::null;
7149  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Unpack-1"))));
7150 #endif
7151 
7152  // Backwards compatibility measure. We'll use this again below.
7153  destGraph->numImportPacketsPerLID_.sync_host();
7154  Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
7155  getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
7156  destGraph->imports_.sync_host();
7157  Teuchos::ArrayView<const packet_type> hostImports =
7158  getArrayViewFromDualView(destGraph->imports_);
7159  size_t mynnz =
7160  unpackAndCombineWithOwningPIDsCount(*this, RemoteLIDs, hostImports,
7161  numImportPacketsPerLID,
7162  constantNumPackets, INSERT,
7163  NumSameIDs, PermuteToLIDs, PermuteFromLIDs);
7164  size_t N = BaseRowMap->getLocalNumElements();
7165 
7166  // Allocations
7167  ArrayRCP<size_t> CSR_rowptr(N+1);
7168  ArrayRCP<GO> CSR_colind_GID;
7169  ArrayRCP<LO> CSR_colind_LID;
7170  CSR_colind_GID.resize(mynnz);
7171 
7172  // If LO and GO are the same, we can reuse memory when
7173  // converting the column indices from global to local indices.
7174  if (typeid(LO) == typeid(GO)) {
7175  CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO>(CSR_colind_GID);
7176  }
7177  else {
7178  CSR_colind_LID.resize(mynnz);
7179  }
7180 
7181  // FIXME (mfh 15 May 2014) Why can't we abstract this out as an
7182  // unpackAndCombine method on a "CrsArrays" object? This passing
7183  // in a huge list of arrays is icky. Can't we have a bit of an
7184  // abstraction? Implementing a concrete DistObject subclass only
7185  // takes five methods.
7186  unpackAndCombineIntoCrsArrays(*this, RemoteLIDs, hostImports,
7187  numImportPacketsPerLID, constantNumPackets,
7188  INSERT, NumSameIDs, PermuteToLIDs,
7189  PermuteFromLIDs, N, mynnz, MyPID,
7190  CSR_rowptr(), CSR_colind_GID(),
7191  SourcePids(), TargetPids);
7192 
7193  /**************************************************************/
7194  /**** 4) Call Optimized MakeColMap w/ no Directory Lookups ****/
7195  /**************************************************************/
7196 #ifdef HAVE_TPETRA_MMM_TIMINGS
7197  MM = Teuchos::null;
7198  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Unpack-2"))));
7199 #endif
7200  // Call an optimized version of makeColMap that avoids the
7201  // Directory lookups (since the Import object knows who owns all
7202  // the GIDs).
7203  Teuchos::Array<int> RemotePids;
7204  Import_Util::lowCommunicationMakeColMapAndReindex(CSR_rowptr(),
7205  CSR_colind_LID(),
7206  CSR_colind_GID(),
7207  BaseDomainMap,
7208  TargetPids, RemotePids,
7209  MyColMap);
7210 
7211  /*******************************************************/
7212  /**** 4) Second communicator restriction phase ****/
7213  /*******************************************************/
7214  if (restrictComm) {
7215  ReducedColMap = (MyRowMap.getRawPtr() == MyColMap.getRawPtr()) ?
7216  ReducedRowMap :
7217  MyColMap->replaceCommWithSubset(ReducedComm);
7218  MyColMap = ReducedColMap; // Reset the "my" maps
7219  }
7220 
7221  // Replace the col map
7222  destGraph->replaceColMap(MyColMap);
7223 
7224  // Short circuit if the processor is no longer in the communicator
7225  //
7226  // NOTE: Epetra replaces modifies all "removed" processes so they
7227  // have a dummy (serial) Map that doesn't touch the original
7228  // communicator. Duplicating that here might be a good idea.
7229  if (ReducedComm.is_null()) {
7230  return;
7231  }
7232 
7233  /***************************************************/
7234  /**** 5) Sort ****/
7235  /***************************************************/
7236  if ((! reverseMode && xferAsImport != nullptr) ||
7237  (reverseMode && xferAsExport != nullptr)) {
7238  Import_Util::sortCrsEntries(CSR_rowptr(),
7239  CSR_colind_LID());
7240  }
7241  else if ((! reverseMode && xferAsExport != nullptr) ||
7242  (reverseMode && xferAsImport != nullptr)) {
7243  Import_Util::sortAndMergeCrsEntries(CSR_rowptr(),
7244  CSR_colind_LID());
7245  if (CSR_rowptr[N] != mynnz) {
7246  CSR_colind_LID.resize(CSR_rowptr[N]);
7247  }
7248  }
7249  else {
7250  TEUCHOS_TEST_FOR_EXCEPTION(
7251  true, std::logic_error,
7252  prefix << "Should never get here! Please report this bug to a Tpetra developer.");
7253  }
7254  /***************************************************/
7255  /**** 6) Reset the colmap and the arrays ****/
7256  /***************************************************/
7257 
7258  // Call constructor for the new graph (restricted as needed)
7259  //
7260  destGraph->setAllIndices(CSR_rowptr, CSR_colind_LID);
7261 
7262  /***************************************************/
7263  /**** 7) Build Importer & Call ESFC ****/
7264  /***************************************************/
7265  // Pre-build the importer using the existing PIDs
7266  Teuchos::ParameterList esfc_params;
7267 #ifdef HAVE_TPETRA_MMM_TIMINGS
7268  MM = Teuchos::null;
7269  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("CreateImporter"))));
7270 #endif
7271  RCP<import_type> MyImport = rcp(new import_type(MyDomainMap, MyColMap, RemotePids));
7272 #ifdef HAVE_TPETRA_MMM_TIMINGS
7273  MM = Teuchos::null;
7274  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("ESFC"))));
7275 
7276  esfc_params.set("Timer Label",prefix + std::string("TAFC"));
7277 #endif
7278  if(!params.is_null())
7279  esfc_params.set("compute global constants",params->get("compute global constants",true));
7280 
7281  destGraph->expertStaticFillComplete(MyDomainMap, MyRangeMap,
7282  MyImport, Teuchos::null, rcp(&esfc_params,false));
7283 
7284  }
7285 
7286  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7287  void
7290  const import_type& importer,
7291  const Teuchos::RCP<const map_type>& domainMap,
7292  const Teuchos::RCP<const map_type>& rangeMap,
7293  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7294  {
7295  transferAndFillComplete(destGraph, importer, Teuchos::null, domainMap, rangeMap, params);
7296  }
7297 
7298  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7299  void
7302  const import_type& rowImporter,
7303  const import_type& domainImporter,
7304  const Teuchos::RCP<const map_type>& domainMap,
7305  const Teuchos::RCP<const map_type>& rangeMap,
7306  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7307  {
7308  transferAndFillComplete(destGraph, rowImporter, Teuchos::rcpFromRef(domainImporter), domainMap, rangeMap, params);
7309  }
7310 
7311  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7312  void
7315  const export_type& exporter,
7316  const Teuchos::RCP<const map_type>& domainMap,
7317  const Teuchos::RCP<const map_type>& rangeMap,
7318  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7319  {
7320  transferAndFillComplete(destGraph, exporter, Teuchos::null, domainMap, rangeMap, params);
7321  }
7322 
7323  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7324  void
7327  const export_type& rowExporter,
7328  const export_type& domainExporter,
7329  const Teuchos::RCP<const map_type>& domainMap,
7330  const Teuchos::RCP<const map_type>& rangeMap,
7331  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7332  {
7333  transferAndFillComplete(destGraph, rowExporter, Teuchos::rcpFromRef(domainExporter), domainMap, rangeMap, params);
7334  }
7335 
7336 
7337  template<class LocalOrdinal, class GlobalOrdinal, class Node>
7338  void
7341  {
7342  std::swap(graph.need_sync_host_uvm_access, this->need_sync_host_uvm_access);
7343 
7344  std::swap(graph.rowMap_, this->rowMap_);
7345  std::swap(graph.colMap_, this->colMap_);
7346  std::swap(graph.rangeMap_, this->rangeMap_);
7347  std::swap(graph.domainMap_, this->domainMap_);
7348 
7349  std::swap(graph.importer_, this->importer_);
7350  std::swap(graph.exporter_, this->exporter_);
7351 
7352  std::swap(graph.nodeMaxNumRowEntries_, this->nodeMaxNumRowEntries_);
7353 
7354  std::swap(graph.globalNumEntries_, this->globalNumEntries_);
7355  std::swap(graph.globalMaxNumRowEntries_, this->globalMaxNumRowEntries_);
7356 
7357  std::swap(graph.numAllocForAllRows_, this->numAllocForAllRows_);
7358 
7359  std::swap(graph.rowPtrsPacked_dev_, this->rowPtrsPacked_dev_);
7360  std::swap(graph.rowPtrsPacked_host_, this->rowPtrsPacked_host_);
7361 
7362  std::swap(graph.rowPtrsUnpacked_dev_, this->rowPtrsUnpacked_dev_);
7363  std::swap(graph.rowPtrsUnpacked_host_, this->rowPtrsUnpacked_host_);
7364  std::swap(graph.packedUnpackedRowPtrsMatch_, this->packedUnpackedRowPtrsMatch_);
7365 
7366  std::swap(graph.k_offRankOffsets_, this->k_offRankOffsets_);
7367 
7368  std::swap(graph.lclIndsUnpacked_wdv, this->lclIndsUnpacked_wdv);
7369  std::swap(graph.gblInds_wdv, this->gblInds_wdv);
7370  std::swap(graph.lclIndsPacked_wdv, this->lclIndsPacked_wdv);
7371 
7372  std::swap(graph.storageStatus_, this->storageStatus_);
7373 
7374  std::swap(graph.indicesAreAllocated_, this->indicesAreAllocated_);
7375  std::swap(graph.indicesAreLocal_, this->indicesAreLocal_);
7376  std::swap(graph.indicesAreGlobal_, this->indicesAreGlobal_);
7377  std::swap(graph.fillComplete_, this->fillComplete_);
7378  std::swap(graph.indicesAreSorted_, this->indicesAreSorted_);
7379  std::swap(graph.noRedundancies_, this->noRedundancies_);
7380  std::swap(graph.haveLocalConstants_, this->haveLocalConstants_);
7381  std::swap(graph.haveGlobalConstants_, this->haveGlobalConstants_);
7382  std::swap(graph.haveLocalOffRankOffsets_, this->haveLocalOffRankOffsets_);
7383 
7384  std::swap(graph.sortGhostsAssociatedWithEachProcessor_, this->sortGhostsAssociatedWithEachProcessor_);
7385 
7386  std::swap(graph.k_numAllocPerRow_, this->k_numAllocPerRow_); // View
7387  std::swap(graph.k_numRowEntries_, this->k_numRowEntries_); // View
7388  std::swap(graph.nonlocals_, this->nonlocals_); // std::map
7389  }
7390 
7391 
7392  template<class LocalOrdinal, class GlobalOrdinal, class Node>
7393  bool
7396  {
7397  auto compare_nonlocals = [&] (const nonlocals_type & m1, const nonlocals_type & m2) {
7398  bool output = true;
7399  output = m1.size() == m2.size() ? output : false;
7400  for(auto & it_m: m1)
7401  {
7402  size_t key = it_m.first;
7403  output = m2.find(key) != m2.end() ? output : false;
7404  if(output)
7405  {
7406  auto v1 = m1.find(key)->second;
7407  auto v2 = m2.find(key)->second;
7408  std::sort(v1.begin(), v1.end());
7409  std::sort(v2.begin(), v2.end());
7410 
7411  output = v1.size() == v2.size() ? output : false;
7412  for(size_t i=0; output && i<v1.size(); i++)
7413  {
7414  output = v1[i]==v2[i] ? output : false;
7415  }
7416  }
7417  }
7418  return output;
7419  };
7420 
7421  bool output = true;
7422 
7423  output = this->rowMap_->isSameAs( *(graph.rowMap_) ) ? output : false;
7424  output = this->colMap_->isSameAs( *(graph.colMap_) ) ? output : false;
7425  output = this->rangeMap_->isSameAs( *(graph.rangeMap_) ) ? output : false;
7426  output = this->domainMap_->isSameAs( *(graph.domainMap_) ) ? output : false;
7427 
7428  output = this->nodeMaxNumRowEntries_ == graph.nodeMaxNumRowEntries_ ? output : false;
7429 
7430  output = this->globalNumEntries_ == graph.globalNumEntries_ ? output : false;
7431  output = this->globalMaxNumRowEntries_ == graph.globalMaxNumRowEntries_ ? output : false;
7432 
7433  output = this->numAllocForAllRows_ == graph.numAllocForAllRows_ ? output : false;
7434 
7435  output = this->storageStatus_ == graph.storageStatus_ ? output : false; // EStorageStatus is an enum
7436 
7437  output = this->indicesAreAllocated_ == graph.indicesAreAllocated_ ? output : false;
7438  output = this->indicesAreLocal_ == graph.indicesAreLocal_ ? output : false;
7439  output = this->indicesAreGlobal_ == graph.indicesAreGlobal_ ? output : false;
7440  output = this->fillComplete_ == graph.fillComplete_ ? output : false;
7441  output = this->indicesAreSorted_ == graph.indicesAreSorted_ ? output : false;
7442  output = this->noRedundancies_ == graph.noRedundancies_ ? output : false;
7443  output = this->haveLocalConstants_ == graph.haveLocalConstants_ ? output : false;
7444  output = this->haveGlobalConstants_ == graph.haveGlobalConstants_ ? output : false;
7445  output = this->haveLocalOffRankOffsets_ == graph.haveLocalOffRankOffsets_ ? output : false;
7446  output = this->sortGhostsAssociatedWithEachProcessor_ == this->sortGhostsAssociatedWithEachProcessor_ ? output : false;
7447 
7448  // Compare nonlocals_ -- std::map<GlobalOrdinal, std::vector<GlobalOrdinal> >
7449  // nonlocals_ isa std::map<GO, std::vector<GO> >
7450  output = compare_nonlocals(this->nonlocals_, graph.nonlocals_) ? output : false;
7451 
7452  // Compare k_numAllocPerRow_ isa Kokkos::View::HostMirror
7453  // - since this is a HostMirror type, it should be in host memory already
7454  output = this->k_numAllocPerRow_.extent(0) == graph.k_numAllocPerRow_.extent(0) ? output : false;
7455  if(output && this->k_numAllocPerRow_.extent(0) > 0)
7456  {
7457  for(size_t i=0; output && i<this->k_numAllocPerRow_.extent(0); i++)
7458  output = this->k_numAllocPerRow_(i) == graph.k_numAllocPerRow_(i) ? output : false;
7459  }
7460 
7461  // Compare k_numRowEntries_ isa Kokkos::View::HostMirror
7462  // - since this is a HostMirror type, it should be in host memory already
7463  output = this->k_numRowEntries_.extent(0) == graph.k_numRowEntries_.extent(0) ? output : false;
7464  if(output && this->k_numRowEntries_.extent(0) > 0)
7465  {
7466  for(size_t i = 0; output && i < this->k_numRowEntries_.extent(0); i++)
7467  output = this->k_numRowEntries_(i) == graph.k_numRowEntries_(i) ? output : false;
7468  }
7469 
7470  // Compare this->k_rowPtrs_ isa Kokkos::View<LocalOrdinal*, ...>
7471  {
7472  auto rowPtrsThis = this->getRowPtrsUnpackedHost();
7473  auto rowPtrsGraph = graph.getRowPtrsUnpackedHost();
7474  output = rowPtrsThis .extent(0) == rowPtrsGraph.extent(0) ? output : false;
7475  for(size_t i=0; output && i< rowPtrsThis.extent(0); i++)
7476  output = rowPtrsThis(i) == rowPtrsGraph(i) ? output : false;
7477  }
7478 
7479  // Compare lclIndsUnpacked_wdv isa Kokkos::View<LocalOrdinal*, ...>
7480  output = this->lclIndsUnpacked_wdv.extent(0) == graph.lclIndsUnpacked_wdv.extent(0) ? output : false;
7481  if(output && this->lclIndsUnpacked_wdv.extent(0) > 0)
7482  {
7483  auto indThis = this->lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
7484  auto indGraph = graph.lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
7485  for(size_t i=0; output && i < indThis.extent(0); i++)
7486  output = indThis(i) == indGraph(i) ? output : false;
7487  }
7488 
7489  // Compare gblInds_wdv isa Kokkos::View<GlobalOrdinal*, ...>
7490  output = this->gblInds_wdv.extent(0) == graph.gblInds_wdv.extent(0) ? output : false;
7491  if(output && this->gblInds_wdv.extent(0) > 0)
7492  {
7493  auto indtThis = this->gblInds_wdv.getHostView(Access::ReadOnly);
7494  auto indtGraph = graph.gblInds_wdv.getHostView(Access::ReadOnly);
7495  for(size_t i=0; output && i<indtThis.extent(0); i++)
7496  output = indtThis(i) == indtGraph(i) ? output : false;
7497  }
7498 
7499  // Check lclGraph_ isa
7500  // Kokkos::StaticCrsGraph<LocalOrdinal, Kokkos::LayoutLeft, execution_space>
7501  // Kokkos::StaticCrsGraph has 3 data members in it:
7502  // Kokkos::View<size_type*, ...> row_map
7503  // (local_graph_device_type::row_map_type)
7504  // Kokkos::View<data_type*, ...> entries
7505  // (local_graph_device_type::entries_type)
7506  // Kokkos::View<size_type*, ...> row_block_offsets
7507  // (local_graph_device_type::row_block_type)
7508  // There is currently no Kokkos::StaticCrsGraph comparison function
7509  // that's built-in, so we will just compare
7510  // the three data items here. This can be replaced if Kokkos ever
7511  // puts in its own comparison routine.
7512  local_graph_host_type thisLclGraph = this->getLocalGraphHost();
7513  local_graph_host_type graphLclGraph = graph.getLocalGraphHost();
7514 
7515  output = thisLclGraph.row_map.extent(0) == graphLclGraph.row_map.extent(0)
7516  ? output : false;
7517  if(output && thisLclGraph.row_map.extent(0) > 0)
7518  {
7519  auto lclGraph_rowmap_host_this = thisLclGraph.row_map;
7520  auto lclGraph_rowmap_host_graph = graphLclGraph.row_map;
7521  for (size_t i=0; output && i < lclGraph_rowmap_host_this.extent(0); i++)
7522  output = lclGraph_rowmap_host_this(i) == lclGraph_rowmap_host_graph(i)
7523  ? output : false;
7524  }
7525 
7526  output = thisLclGraph.entries.extent(0) == graphLclGraph.entries.extent(0)
7527  ? output : false;
7528  if(output && thisLclGraph.entries.extent(0) > 0)
7529  {
7530  auto lclGraph_entries_host_this = thisLclGraph.entries;
7531  auto lclGraph_entries_host_graph = graphLclGraph.entries;
7532  for (size_t i=0; output && i < lclGraph_entries_host_this.extent(0); i++)
7533  output = lclGraph_entries_host_this(i) == lclGraph_entries_host_graph(i)
7534  ? output : false;
7535  }
7536 
7537  output =
7538  thisLclGraph.row_block_offsets.extent(0) ==
7539  graphLclGraph.row_block_offsets.extent(0) ? output : false;
7540  if(output && thisLclGraph.row_block_offsets.extent(0) > 0)
7541  {
7542  auto lclGraph_rbo_host_this = thisLclGraph.row_block_offsets;
7543  auto lclGraph_rbo_host_graph = graphLclGraph.row_block_offsets;
7544  for (size_t i=0; output && i < lclGraph_rbo_host_this.extent(0); i++)
7545  output = lclGraph_rbo_host_this(i) == lclGraph_rbo_host_graph(i)
7546  ? output : false;
7547  }
7548 
7549  // For Importer and Exporter, we don't need to explicitly check them since
7550  // they will be consistent with the maps.
7551  // Note: importer_ isa Teuchos::RCP<const import_type>
7552  // exporter_ isa Teuchos::RCP<const export_type>
7553 
7554  return output;
7555  }
7556 
7557 
7558 
7559 } // namespace Tpetra
7560 
7561 //
7562 // Explicit instantiation macros
7563 //
7564 // Must be expanded from within the Tpetra namespace!
7565 //
7566 
7567 #define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7568  template<> \
7569  Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7570  importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7571  const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7572  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7573  CrsGraph<LO,GO,NODE>::node_type>& importer, \
7574  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7575  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7576  CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7577  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7578  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7579  CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7580  const Teuchos::RCP<Teuchos::ParameterList>& params);
7581 
7582 #define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
7583  template<> \
7584  Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7585  importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7586  const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7587  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7588  CrsGraph<LO,GO,NODE>::node_type>& rowImporter, \
7589  const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7590  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7591  CrsGraph<LO,GO,NODE>::node_type>& domainImporter, \
7592  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7593  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7594  CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7595  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7596  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7597  CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7598  const Teuchos::RCP<Teuchos::ParameterList>& params);
7599 
7600 
7601 #define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7602  template<> \
7603  Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7604  exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7605  const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7606  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7607  CrsGraph<LO,GO,NODE>::node_type>& exporter, \
7608  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7609  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7610  CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7611  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7612  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7613  CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7614  const Teuchos::RCP<Teuchos::ParameterList>& params);
7615 
7616 #define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
7617  template<> \
7618  Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7619  exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7620  const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7621  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7622  CrsGraph<LO,GO,NODE>::node_type>& rowExporter, \
7623  const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7624  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7625  CrsGraph<LO,GO,NODE>::node_type>& domainExporter, \
7626  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7627  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7628  CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7629  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7630  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7631  CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7632  const Teuchos::RCP<Teuchos::ParameterList>& params);
7633 
7634 
7635 #define TPETRA_CRSGRAPH_INSTANT( LO, GO, NODE ) \
7636  template class CrsGraph<LO, GO, NODE>; \
7637  TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7638  TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7639  TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
7640  TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE)
7641 
7642 
7643 #endif // TPETRA_CRSGRAPH_DEF_HPP
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
void setDomainRangeMaps(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap)
void setAllIndices(const typename local_graph_device_type::row_map_type &rowPointers, const typename local_graph_device_type::entries_type::non_const_type &columnIndices)
Set the graph&#39;s data directly, using 1-D storage.
Teuchos::RCP< const map_type > getRowMap() const override
Returns the Map that describes the row distribution in this graph.
offset_device_view_type k_offRankOffsets_
The offsets for off-rank entries.
size_t nodeMaxNumRowEntries_
Local maximum of the number of entries in each row.
bool indicesAreSorted_
Whether the graph&#39;s indices are sorted in each row, on this process.
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types...
Teuchos::RCP< const map_type > rangeMap_
The Map describing the range of the (matrix corresponding to the) graph.
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
local_inds_host_view_type getLocalIndicesHost() const
Get a host view of the packed column indicies.
GlobalOrdinal global_ordinal_type
The type of the graph&#39;s global indices.
local_inds_dualv_type::t_host::const_type getLocalIndsViewHost(const RowInfo &rowinfo) const
Get a const, locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myRo...
An abstract interface for graphs accessed by rows.
Kokkos::View< const size_t *, device_type >::HostMirror k_numAllocPerRow_
The maximum number of entries to allow in each locally owned row, per row.
void getLocalOffRankOffsets(offset_device_view_type &offsets) const
Get offsets of the off-rank entries in the graph.
void insertGlobalIndicesFiltered(const local_ordinal_type lclRow, const global_ordinal_type gblColInds[], const local_ordinal_type numGblColInds)
Like insertGlobalIndices(), but with column Map filtering.
local_inds_dualv_type::t_host getLocalIndsViewHostNonConst(const RowInfo &rowinfo)
Get a ReadWrite locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(m...
bool sortGhostsAssociatedWithEachProcessor_
Whether to require makeColMap() (and therefore fillComplete()) to order column Map GIDs associated wi...
size_t insertGlobalIndicesImpl(const local_ordinal_type lclRow, const global_ordinal_type inputGblColInds[], const size_t numInputInds)
Insert global indices, using an input local row index.
static KOKKOS_INLINE_FUNCTION size_t unpackValue(LO &outVal, const char inBuf[])
Unpack the given value from the given output buffer.
void resumeFill(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Resume fill operations.
bool haveGlobalConstants_
Whether all processes have computed global constants.
size_t getLocalNumRows() const override
Returns the number of graph rows owned on the calling node.
void expertStaticFillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< const import_type > &importer=Teuchos::null, const Teuchos::RCP< const export_type > &exporter=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Perform a fillComplete on a graph that already has data, via setAllIndices().
Teuchos::RCP< const import_type > getImporter() const override
Returns the importer associated with this graph.
void getLocalDiagOffsets(const Kokkos::View< size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Get offsets of the diagonal entries in the graph.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
void insertGlobalIndices(const global_ordinal_type globalRow, const Teuchos::ArrayView< const global_ordinal_type > &indices)
Insert global indices into the graph.
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Get the number of entries in the given row (local index).
size_t getLocalAllocationSize() const
The local number of indices allocated for the graph, over all rows on the calling (MPI) process...
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const override
Default parameter list suitable for validation.
Declaration of a function that prints strings from each process.
void computeLocalConstants()
Compute local constants, if they have not yet been computed.
void packCrsGraphWithOwningPIDs(const CrsGraph< LO, GO, NT > &sourceGraph, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse graph for communication.
typename row_graph_type::local_inds_device_view_type local_inds_device_view_type
The Kokkos::View type for views of local ordinals on device and host.
bool isIdenticalTo(const CrsGraph< LocalOrdinal, GlobalOrdinal, Node > &graph) const
Create a cloned CrsGraph for a different Node type.
bool isMerged() const
Whether duplicate column indices in each row have been merged.
Kokkos::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, device_type, void, size_t > local_graph_device_type
The type of the part of the sparse graph on each MPI process.
virtual bool checkSizes(const SrcDistObject &source) override
Compare the source and target (this) objects for compatibility.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
size_t getGlobalMaxNumRowEntries() const override
Maximum number of entries in any row of the graph, over all processes in the graph&#39;s communicator...
Declare and define Tpetra::Details::copyOffsets, an implementation detail of Tpetra (in particular...
bool noRedundancies_
Whether the graph&#39;s indices are non-redundant (merged) in each row, on this process.
global_inds_dualv_type::t_dev::const_type getGlobalIndsViewDevice(const RowInfo &rowinfo) const
Get a const, globally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myR...
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices_wdv, const Padding &padding, const int my_rank, const bool verbose)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries...
OffsetsViewType::non_const_value_type computeOffsetsFromConstantCount(const OffsetsViewType &ptr, const CountType count)
Compute offsets from a constant count.
void replaceRangeMap(const Teuchos::RCP< const map_type > &newRangeMap)
Replace the current Range Map with the given objects.
void packCrsGraph(const CrsGraph< LO, GO, NT > &sourceGraph, Teuchos::Array< typename CrsGraph< LO, GO, NT >::packet_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse graph for communication.
row_ptrs_device_view_type getLocalRowPtrsDevice() const
Get a device view of the packed row offsets.
global_inds_dualv_type::t_host::const_type getGlobalIndsViewHost(const RowInfo &rowinfo) const
Get a const, globally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myR...
const row_ptrs_host_view_type & getRowPtrsPackedHost() const
Get the packed row pointers on host. Lazily make a copy from device.
static bool debug()
Whether Tpetra is in debug mode.
size_t getNumAllocatedEntriesInGlobalRow(global_ordinal_type globalRow) const
Current number of allocated entries in the given row on the calling (MPI) process, using a global row index.
Allocation information for a locally owned row in a CrsGraph or CrsMatrix.
local_inds_wdv_type lclIndsUnpacked_wdv
Local ordinals of column indices for all rows Valid when isLocallyIndexed is true If OptimizedStorage...
void replaceRangeMapAndExporter(const Teuchos::RCP< const map_type > &newRangeMap, const Teuchos::RCP< const export_type > &newExporter)
Replace the current Range Map and Export with the given parameters.
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
bool hasColMap() const override
Whether the graph has a column Map.
bool isGloballyIndexed() const override
Whether the graph&#39;s column indices are stored as global indices.
void exportAndFillComplete(Teuchos::RCP< CrsGraph< local_ordinal_type, global_ordinal_type, Node > > &destGraph, const export_type &exporter, const Teuchos::RCP< const map_type > &domainMap=Teuchos::null, const Teuchos::RCP< const map_type > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Export from this to the given destination graph, and make the result fill complete.
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode) override
Teuchos_Ordinal Array_size_type
Size type for Teuchos Array objects.
int local_ordinal_type
Default value of Scalar template parameter.
bool isStorageOptimized() const
Returns true if storage has been optimized.
bool haveLocalConstants_
Whether this process has computed local constants.
size_t findGlobalIndices(const RowInfo &rowInfo, const Teuchos::ArrayView< const global_ordinal_type > &indices, std::function< void(const size_t, const size_t, const size_t)> fun) const
Finds indices in the given row.
std::string description() const override
Return a one-line human-readable description of this object.
void sort(View &view, const size_t &size)
Convenience wrapper for std::sort for host-accessible views.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator, in rank order.
void makeColMap(Teuchos::Array< int > &remotePIDs)
Make and set the graph&#39;s column Map.
Teuchos::RCP< const export_type > getExporter() const override
Returns the exporter associated with this graph.
std::pair< size_t, std::string > makeIndicesLocal(const bool verbose=false)
Convert column indices from global to local.
size_t global_size_t
Global size_t object.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap) override
Remove processes owning zero rows from the Maps and their communicator.
Node node_type
This class&#39; Kokkos Node type.
void reindexColumns(const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortIndicesInEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
void makeImportExport(Teuchos::Array< int > &remotePIDs, const bool useRemotePIDs)
Make the Import and Export objects, if needed.
size_t sortAndMergeRowIndices(const RowInfo &rowInfo, const bool sorted, const bool merged)
Sort and merge duplicate column indices in the given row.
void insertLocalIndices(const local_ordinal_type localRow, const Teuchos::ArrayView< const local_ordinal_type > &indices)
Insert local indices into the graph.
Insert new values that don&#39;t currently exist.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified, possibly nondefault Kokkos Node type.
void getLocalRowCopy(local_ordinal_type gblRow, nonconst_local_inds_host_view_type &gblColInds, size_t &numColInds) const override
Get a copy of the given row, using local indices.
Teuchos::RCP< const map_type > rowMap_
The Map describing the distribution of rows of the graph.
Teuchos::RCP< const import_type > importer_
The Import from the domain Map to the column Map.
size_t insertCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, InOutIndices &curIndices, size_t &numAssigned, InIndices const &newIndices, std::function< void(const size_t, const size_t, const size_t)> cb=std::function< void(const size_t, const size_t, const size_t)>())
Insert new indices in to current list of indices.
num_row_entries_type k_numRowEntries_
The number of local entries in each locally owned row.
local_inds_device_view_type getLocalIndicesDevice() const
Get a device view of the packed column indicies.
Teuchos::RCP< const map_type > domainMap_
The Map describing the domain of the (matrix corresponding to the) graph.
OffsetType convertColumnIndicesFromGlobalToLocal(const Kokkos::View< LO *, DT > &lclColInds, const Kokkos::View< const GO *, DT > &gblColInds, const Kokkos::View< const OffsetType *, DT > &ptr, const LocalMap< LO, GO, DT > &lclColMap, const Kokkos::View< const NumEntType *, DT > &numRowEnt)
Convert a CrsGraph&#39;s global column indices into local column indices.
global_size_t getGlobalNumRows() const override
Returns the number of global rows in the graph.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
Functions for manipulating CRS arrays.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
void setLocallyModified()
Report that we made a local modification to its structure.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
void checkInternalState() const
Throw an exception if the internal state is not consistent.
bool isNodeGlobalElement(global_ordinal_type globalIndex) const
Whether the given global index is owned by this Map on the calling process.
Sets up and executes a communication plan for a Tpetra DistObject.
size_t findCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, const size_t curNumEntries, Indices1 const &curIndices, Indices2 const &newIndices, Callback &&cb)
Finds offsets in to current list of indices.
static bool verbose()
Whether Tpetra is in verbose mode.
CombineMode
Rule for combining data in an Import or Export.
Teuchos::RCP< const export_type > exporter_
The Export from the row Map to the range Map.
void insertGlobalIndicesIntoNonownedRows(const global_ordinal_type gblRow, const global_ordinal_type gblColInds[], const local_ordinal_type numGblColInds)
Implementation of insertGlobalIndices for nonowned rows.
bool isFillComplete() const override
Whether fillComplete() has been called and the graph is in compute mode.
size_t getNumAllocatedEntriesInLocalRow(local_ordinal_type localRow) const
Current number of allocated entries in the given row on the calling (MPI) process, using a local row index.
void swap(CrsGraph< local_ordinal_type, global_ordinal_type, Node > &graph)
Swaps the data from *this with the data and maps from graph.
void globalAssemble()
Communicate nonlocal contributions to other processes.
typename device_type::execution_space execution_space
This class&#39; Kokkos execution space.
RowInfo getRowInfoFromGlobalRowIndex(const global_ordinal_type gblRow) const
Get information about the locally owned row with global index gblRow.
Utility functions for packing and unpacking sparse matrix entries.
size_t getLocalNumCols() const override
Returns the number of columns connected to the locally owned rows of this graph.
size_t getLocalNumEntries() const override
The local number of entries in the graph.
Abstract base class for objects that can be the source of an Import or Export operation.
void removeLocalIndices(local_ordinal_type localRow)
Remove all graph indices from the specified local row.
LocalOrdinal local_ordinal_type
The type of the graph&#39;s local indices.
global_size_t globalNumEntries_
Global number of entries in the graph.
global_inds_wdv_type gblInds_wdv
Global ordinals of column indices for all rows.
local_inds_dualv_type::t_dev::const_type getLocalIndsViewDevice(const RowInfo &rowinfo) const
Get a const, locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myRo...
Teuchos::RCP< const map_type > getRangeMap() const override
Returns the Map associated with the domain of this graph.
virtual void pack(const Teuchos::ArrayView< const local_ordinal_type > &exportLIDs, Teuchos::Array< global_ordinal_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, size_t &constantNumPackets) const override
void enableWDVTracking()
Enable WrappedDualView reference-count tracking and syncing. Call this after exiting a host-parallel ...
row_ptrs_host_view_type getLocalRowPtrsHost() const
Get a host view of the packed row offsets.
void packCrsGraphNew(const CrsGraph< LO, GO, NT > &sourceGraph, const Kokkos::DualView< const LO *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportLIDs, const Kokkos::DualView< const int *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportPIDs, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports, Kokkos::DualView< size_t *, typename CrsGraph< LO, GO, NT >::buffer_device_type > numPacketsPerLID, size_t &constantNumPackets, const bool pack_pids)
Pack specified entries of the given local sparse graph for communication, for &quot;new&quot; DistObject interf...
const row_ptrs_host_view_type & getRowPtrsUnpackedHost() const
Get the unpacked row pointers on host. Lazily make a copy from device.
void replaceColMap(const Teuchos::RCP< const map_type > &newColMap)
Replace the graph&#39;s current column Map with the given Map.
global_size_t getGlobalNumCols() const override
Returns the number of global columns in the graph.
size_t getLocalMaxNumRowEntries() const override
Maximum number of entries in any row of the graph, on this process.
RowInfo getRowInfo(const local_ordinal_type myRow) const
Get information about the locally owned row with local index myRow.
void replaceDomainMapAndImporter(const Teuchos::RCP< const map_type > &newDomainMap, const Teuchos::RCP< const import_type > &newImporter)
Replace the current domain Map and Import with the given parameters.
bool supportsRowViews() const override
Whether this class implements getLocalRowView() and getGlobalRowView() (it does). ...
typename dist_object_type::buffer_device_type buffer_device_type
Kokkos::Device specialization for communication buffers.
local_map_type getLocalMap() const
Get the local Map for Kokkos kernels.
local_inds_wdv_type lclIndsPacked_wdv
Local ordinals of column indices for all rows Valid when isLocallyIndexed is true Built during fillCo...
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
Details::EStorageStatus storageStatus_
Status of the graph&#39;s storage, when not in a fill-complete state.
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks...
const row_ptrs_device_view_type & getRowPtrsPackedDevice() const
Get the packed row pointers on device.
A parallel distribution of indices over processes.
int makeColMap(Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &colMap, Teuchos::Array< int > &remotePIDs, const Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &domMap, const RowGraph< LO, GO, NT > &graph, const bool sortEachProcsGids=true, std::ostream *errStrm=NULL)
Make the graph&#39;s column Map.
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const LO &)
Number of bytes required to pack or unpack the given value of type value_type.
const row_ptrs_device_view_type & getRowPtrsUnpackedDevice() const
Get the unpacked row pointers on device.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects...
void setParameterList(const Teuchos::RCP< Teuchos::ParameterList > &params) override
Set the given list of parameters (must be nonnull).
size_t getNumEntriesInGlobalRow(global_ordinal_type globalRow) const override
Returns the current number of entries on this node in the specified global row.
void fillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Tell the graph that you are done changing its structure.
void disableWDVTracking()
Disable WrappedDualView reference-count tracking and syncing. Call this before entering a host-parall...
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
void getLocalRowView(const LocalOrdinal lclRow, local_inds_host_view_type &lclColInds) const override
Get a const view of the given local row&#39;s local column indices.
typename Node::device_type device_type
This class&#39; Kokkos device type.
bool isLocallyIndexed() const override
Whether the graph&#39;s column indices are stored as local indices.
A distributed dense vector.
Stand-alone utility functions and macros.
bool isFillActive() const
Whether resumeFill() has been called and the graph is in edit mode.
virtual void copyAndPermute(const SrcDistObject &source, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM) override
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
Returns the communicator.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
void importAndFillComplete(Teuchos::RCP< CrsGraph< local_ordinal_type, global_ordinal_type, Node > > &destGraph, const import_type &importer, const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Import from this to the given destination graph, and make the result fill complete.
global_size_t globalMaxNumRowEntries_
Global maximum of the number of entries in each row.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
size_t insertIndices(RowInfo &rowInfo, const SLocalGlobalViews &newInds, const ELocalGlobal lg, const ELocalGlobal I)
Insert indices into the given row.
global_size_t getGlobalNumEntries() const override
Returns the global number of entries in the graph.
Declaration and definition of Tpetra::Details::getEntryOnHost.
size_t numAllocForAllRows_
The maximum number of entries to allow in each locally owned row.
local_graph_device_type getLocalGraphDevice() const
Get the local graph.
void getGlobalRowView(const global_ordinal_type gblRow, global_inds_host_view_type &gblColInds) const override
Get a const view of the given global row&#39;s global column indices.
Teuchos::RCP< const map_type > colMap_
The Map describing the distribution of columns of the graph.
global_ordinal_type getIndexBase() const override
Returns the index base for global indices for this graph.
typename local_graph_device_type::HostMirror local_graph_host_type
The type of the part of the sparse graph on each MPI process.
void replaceDomainMap(const Teuchos::RCP< const map_type > &newDomainMap)
Replace the current domain Map with the given objects.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
nonlocals_type nonlocals_
Nonlocal data given to insertGlobalIndices.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const override
Print this object to the given output stream with the given verbosity level.
void computeGlobalConstants()
Compute global constants, if they have not yet been computed.
void getGlobalRowCopy(global_ordinal_type gblRow, nonconst_global_inds_host_view_type &gblColInds, size_t &numColInds) const override
Get a copy of the given row, using global indices.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.