Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_CrsGraph_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ************************************************************************
38 // @HEADER
39 
40 #ifndef TPETRA_CRSGRAPH_DEF_HPP
41 #define TPETRA_CRSGRAPH_DEF_HPP
42 
45 
50 #include "Tpetra_Details_getGraphDiagOffsets.hpp"
51 #include "Tpetra_Details_getGraphOffRankOffsets.hpp"
52 #include "Tpetra_Details_makeColMap.hpp"
56 #include "Tpetra_Distributor.hpp"
57 #include "Teuchos_SerialDenseMatrix.hpp"
58 #include "Tpetra_Vector.hpp"
59 #include "Tpetra_Import_Util.hpp"
60 #include "Tpetra_Import_Util2.hpp"
61 #include "Tpetra_Details_packCrsGraph.hpp"
62 #include "Tpetra_Details_unpackCrsGraphAndCombine.hpp"
63 #include "Tpetra_Details_CrsPadding.hpp"
64 #include "Tpetra_Util.hpp"
65 #include <algorithm>
66 #include <limits>
67 #include <map>
68 #include <sstream>
69 #include <string>
70 #include <type_traits>
71 #include <utility>
72 #include <vector>
73 
74 namespace Tpetra {
75  namespace Details {
76  namespace Impl {
77 
78  template<class MapIter>
79  void
80  verbosePrintMap(std::ostream& out,
81  MapIter beg,
82  MapIter end,
83  const size_t numEnt,
84  const char mapName[])
85  {
86  using ::Tpetra::Details::Behavior;
88 
89  out << mapName << ": {";
90  const size_t maxNumToPrint =
92  if (maxNumToPrint == 0) {
93  if (numEnt != 0) {
94  out << "...";
95  }
96  }
97  else {
98  const size_t numToPrint = numEnt > maxNumToPrint ?
99  maxNumToPrint : numEnt;
100  size_t count = 0;
101  for (MapIter it = beg; it != end; ++it) {
102  out << "(" << (*it).first << ", ";
103  verbosePrintArray(out, (*it).second, "gblColInds",
104  maxNumToPrint);
105  out << ")";
106  if (count + size_t(1) < numToPrint) {
107  out << ", ";
108  }
109  ++count;
110  }
111  if (count < numEnt) {
112  out << ", ...";
113  }
114  }
115  out << "}";
116  }
117 
118  template<class LO, class GO, class Node>
119  Teuchos::ArrayView<GO>
120  getRowGraphGlobalRow(
121  std::vector<GO>& gblColIndsStorage,
122  const RowGraph<LO, GO, Node>& graph,
123  const GO gblRowInd)
124  {
125  size_t origNumEnt = graph.getNumEntriesInGlobalRow(gblRowInd);
126  if (gblColIndsStorage.size() < origNumEnt) {
127  gblColIndsStorage.resize(origNumEnt);
128  }
129  typename CrsGraph<LO,GO,Node>::nonconst_global_inds_host_view_type gblColInds(gblColIndsStorage.data(),
130  origNumEnt);
131  graph.getGlobalRowCopy(gblRowInd, gblColInds, origNumEnt);
132  Teuchos::ArrayView<GO> retval(gblColIndsStorage.data(),origNumEnt);
133  return retval;
134  }
135 
136  template<class LO, class GO, class DT, class OffsetType, class NumEntType>
137  class ConvertColumnIndicesFromGlobalToLocal {
138  public:
139  ConvertColumnIndicesFromGlobalToLocal (const ::Kokkos::View<LO*, DT>& lclColInds,
140  const ::Kokkos::View<const GO*, DT>& gblColInds,
141  const ::Kokkos::View<const OffsetType*, DT>& ptr,
142  const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
143  const ::Kokkos::View<const NumEntType*, DT>& numRowEnt) :
144  lclColInds_ (lclColInds),
145  gblColInds_ (gblColInds),
146  ptr_ (ptr),
147  lclColMap_ (lclColMap),
148  numRowEnt_ (numRowEnt)
149  {}
150 
151  KOKKOS_FUNCTION void
152  operator () (const LO& lclRow, OffsetType& curNumBad) const
153  {
154  const OffsetType offset = ptr_(lclRow);
155  // NOTE (mfh 26 Jun 2016) It's always legal to cast the number
156  // of entries in a row to LO, as long as the row doesn't have
157  // too many duplicate entries.
158  const LO numEnt = static_cast<LO> (numRowEnt_(lclRow));
159  for (LO j = 0; j < numEnt; ++j) {
160  const GO gid = gblColInds_(offset + j);
161  const LO lid = lclColMap_.getLocalElement (gid);
162  lclColInds_(offset + j) = lid;
163  if (lid == ::Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
164  ++curNumBad;
165  }
166  }
167  }
168 
169  static OffsetType
170  run (const ::Kokkos::View<LO*, DT>& lclColInds,
171  const ::Kokkos::View<const GO*, DT>& gblColInds,
172  const ::Kokkos::View<const OffsetType*, DT>& ptr,
173  const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
174  const ::Kokkos::View<const NumEntType*, DT>& numRowEnt)
175  {
176  typedef ::Kokkos::RangePolicy<typename DT::execution_space, LO> range_type;
177  typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> functor_type;
178 
179  const LO lclNumRows = ptr.extent (0) == 0 ?
180  static_cast<LO> (0) : static_cast<LO> (ptr.extent (0) - 1);
181  OffsetType numBad = 0;
182  // Count of "bad" column indices is a reduction over rows.
183  ::Kokkos::parallel_reduce (range_type (0, lclNumRows),
184  functor_type (lclColInds, gblColInds, ptr,
185  lclColMap, numRowEnt),
186  numBad);
187  return numBad;
188  }
189 
190  private:
191  ::Kokkos::View<LO*, DT> lclColInds_;
192  ::Kokkos::View<const GO*, DT> gblColInds_;
193  ::Kokkos::View<const OffsetType*, DT> ptr_;
195  ::Kokkos::View<const NumEntType*, DT> numRowEnt_;
196  };
197 
198  } // namespace Impl
199 
214  template<class LO, class GO, class DT, class OffsetType, class NumEntType>
215  OffsetType
216  convertColumnIndicesFromGlobalToLocal (const Kokkos::View<LO*, DT>& lclColInds,
217  const Kokkos::View<const GO*, DT>& gblColInds,
218  const Kokkos::View<const OffsetType*, DT>& ptr,
219  const LocalMap<LO, GO, DT>& lclColMap,
220  const Kokkos::View<const NumEntType*, DT>& numRowEnt)
221  {
222  using Impl::ConvertColumnIndicesFromGlobalToLocal;
223  typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> impl_type;
224  return impl_type::run (lclColInds, gblColInds, ptr, lclColMap, numRowEnt);
225  }
226 
227  template<class ViewType, class LO>
228  class MaxDifference {
229  public:
230  MaxDifference (const ViewType& ptr) : ptr_ (ptr) {}
231 
232  KOKKOS_INLINE_FUNCTION void init (LO& dst) const {
233  dst = 0;
234  }
235 
236  KOKKOS_INLINE_FUNCTION void
237  join (LO& dst, const LO& src) const
238  {
239  dst = (src > dst) ? src : dst;
240  }
241 
242  KOKKOS_INLINE_FUNCTION void
243  operator () (const LO lclRow, LO& maxNumEnt) const
244  {
245  const LO numEnt = static_cast<LO> (ptr_(lclRow+1) - ptr_(lclRow));
246  maxNumEnt = (numEnt > maxNumEnt) ? numEnt : maxNumEnt;
247  }
248  private:
249  typename ViewType::const_type ptr_;
250  };
251 
252  template<class ViewType, class LO>
253  typename ViewType::non_const_value_type
254  maxDifference (const char kernelLabel[],
255  const ViewType& ptr,
256  const LO lclNumRows)
257  {
258  if (lclNumRows == 0) {
259  // mfh 07 May 2018: Weirdly, I need this special case,
260  // otherwise I get the wrong answer.
261  return static_cast<LO> (0);
262  }
263  else {
264  using execution_space = typename ViewType::execution_space;
265  using range_type = Kokkos::RangePolicy<execution_space, LO>;
266  LO theMaxNumEnt {0};
267  Kokkos::parallel_reduce (kernelLabel,
268  range_type (0, lclNumRows),
269  MaxDifference<ViewType, LO> (ptr),
270  theMaxNumEnt);
271  return theMaxNumEnt;
272  }
273  }
274 
275  } // namespace Details
276 
277  template <class LocalOrdinal, class GlobalOrdinal, class Node>
278  bool
279  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
280  getDebug() {
281  return Details::Behavior::debug("CrsGraph");
282  }
283 
284  template <class LocalOrdinal, class GlobalOrdinal, class Node>
285  bool
286  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
287  getVerbose() {
288  return Details::Behavior::verbose("CrsGraph");
289  }
290 
291  template <class LocalOrdinal, class GlobalOrdinal, class Node>
292  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
293  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
294  const size_t maxNumEntriesPerRow,
295  const Teuchos::RCP<Teuchos::ParameterList>& params) :
296  dist_object_type (rowMap)
297  , rowMap_ (rowMap)
298  , numAllocForAllRows_ (maxNumEntriesPerRow)
299  {
300  const char tfecfFuncName[] =
301  "CrsGraph(rowMap,maxNumEntriesPerRow,params): ";
302  staticAssertions ();
303  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
304  (maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid (),
305  std::invalid_argument, "The allocation hint maxNumEntriesPerRow must be "
306  "a valid size_t value, which in this case means it must not be "
307  "Teuchos::OrdinalTraits<size_t>::invalid().");
308  resumeFill (params);
310  }
311 
312  template <class LocalOrdinal, class GlobalOrdinal, class Node>
314  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
315  const Teuchos::RCP<const map_type>& colMap,
316  const size_t maxNumEntriesPerRow,
317  const Teuchos::RCP<Teuchos::ParameterList>& params) :
318  dist_object_type (rowMap)
319  , rowMap_ (rowMap)
320  , colMap_ (colMap)
321  , numAllocForAllRows_ (maxNumEntriesPerRow)
322  {
323  const char tfecfFuncName[] =
324  "CrsGraph(rowMap,colMap,maxNumEntriesPerRow,params): ";
325  staticAssertions ();
326  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
327  maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid (),
328  std::invalid_argument, "The allocation hint maxNumEntriesPerRow must be "
329  "a valid size_t value, which in this case means it must not be "
330  "Teuchos::OrdinalTraits<size_t>::invalid().");
331  resumeFill (params);
333  }
334 
335 
336  template <class LocalOrdinal, class GlobalOrdinal, class Node>
338  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
339  const Teuchos::ArrayView<const size_t>& numEntPerRow,
340  const Teuchos::RCP<Teuchos::ParameterList>& params) :
341  dist_object_type (rowMap)
342  , rowMap_ (rowMap)
343  , numAllocForAllRows_ (0)
344  {
345  const char tfecfFuncName[] =
346  "CrsGraph(rowMap,numEntPerRow,params): ";
347  staticAssertions ();
348 
349  const size_t lclNumRows = rowMap.is_null () ?
350  static_cast<size_t> (0) : rowMap->getLocalNumElements ();
351  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
352  static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
353  std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
354  << " != the local number of rows " << lclNumRows << " as specified by "
355  "the input row Map.");
356 
357  if (debug_) {
358  for (size_t r = 0; r < lclNumRows; ++r) {
359  const size_t curRowCount = numEntPerRow[r];
360  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
361  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
362  std::invalid_argument, "numEntPerRow(" << r << ") "
363  "specifies an invalid number of entries "
364  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
365  }
366  }
367 
368  // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
369  // The latter is a const View, so we have to copy into a nonconst
370  // View first, then assign.
371  typedef decltype (k_numAllocPerRow_) out_view_type;
372  typedef typename out_view_type::non_const_type nc_view_type;
373  typedef Kokkos::View<const size_t*,
374  typename nc_view_type::array_layout,
375  Kokkos::HostSpace,
376  Kokkos::MemoryUnmanaged> in_view_type;
377  in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
378  nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
379  lclNumRows);
380  // DEEP_COPY REVIEW - HOST-TO-HOSTMIRROR
381  using exec_space = typename nc_view_type::execution_space;
382  Kokkos::deep_copy (exec_space(), numAllocPerRowOut, numAllocPerRowIn);
383  k_numAllocPerRow_ = numAllocPerRowOut;
384 
385  resumeFill (params);
387  }
388 
389 
390 
391  template <class LocalOrdinal, class GlobalOrdinal, class Node>
393  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
394  const Kokkos::DualView<const size_t*, device_type>& numEntPerRow,
395  const Teuchos::RCP<Teuchos::ParameterList>& params) :
396  dist_object_type (rowMap)
397  , rowMap_ (rowMap)
398  , k_numAllocPerRow_ (numEntPerRow.h_view)
399  , numAllocForAllRows_ (0)
400  {
401  const char tfecfFuncName[] =
402  "CrsGraph(rowMap,numEntPerRow,params): ";
403  staticAssertions ();
404 
405  const size_t lclNumRows = rowMap.is_null () ?
406  static_cast<size_t> (0) : rowMap->getLocalNumElements ();
407  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
408  static_cast<size_t> (numEntPerRow.extent (0)) != lclNumRows,
409  std::invalid_argument, "numEntPerRow has length " <<
410  numEntPerRow.extent (0) << " != the local number of rows " <<
411  lclNumRows << " as specified by " "the input row Map.");
412 
413  if (debug_) {
414  for (size_t r = 0; r < lclNumRows; ++r) {
415  const size_t curRowCount = numEntPerRow.h_view(r);
416  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
417  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
418  std::invalid_argument, "numEntPerRow(" << r << ") "
419  "specifies an invalid number of entries "
420  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
421  }
422  }
423 
424  resumeFill (params);
426  }
427 
428 
429  template <class LocalOrdinal, class GlobalOrdinal, class Node>
431  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
432  const Teuchos::RCP<const map_type>& colMap,
433  const Kokkos::DualView<const size_t*, device_type>& numEntPerRow,
434  const Teuchos::RCP<Teuchos::ParameterList>& params) :
435  dist_object_type (rowMap)
436  , rowMap_ (rowMap)
437  , colMap_ (colMap)
438  , k_numAllocPerRow_ (numEntPerRow.h_view)
439  , numAllocForAllRows_ (0)
440  {
441  const char tfecfFuncName[] =
442  "CrsGraph(rowMap,colMap,numEntPerRow,params): ";
443  staticAssertions ();
444 
445  const size_t lclNumRows = rowMap.is_null () ?
446  static_cast<size_t> (0) : rowMap->getLocalNumElements ();
447  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
448  static_cast<size_t> (numEntPerRow.extent (0)) != lclNumRows,
449  std::invalid_argument, "numEntPerRow has length " <<
450  numEntPerRow.extent (0) << " != the local number of rows " <<
451  lclNumRows << " as specified by " "the input row Map.");
452 
453  if (debug_) {
454  for (size_t r = 0; r < lclNumRows; ++r) {
455  const size_t curRowCount = numEntPerRow.h_view(r);
456  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
457  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
458  std::invalid_argument, "numEntPerRow(" << r << ") "
459  "specifies an invalid number of entries "
460  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
461  }
462  }
463 
464  resumeFill (params);
466  }
467 
468 
469  template <class LocalOrdinal, class GlobalOrdinal, class Node>
471  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
472  const Teuchos::RCP<const map_type>& colMap,
473  const Teuchos::ArrayView<const size_t>& numEntPerRow,
474  const Teuchos::RCP<Teuchos::ParameterList>& params) :
475  dist_object_type (rowMap)
476  , rowMap_ (rowMap)
477  , colMap_ (colMap)
478  , numAllocForAllRows_ (0)
479  {
480  const char tfecfFuncName[] =
481  "CrsGraph(rowMap,colMap,numEntPerRow,params): ";
482  staticAssertions ();
483 
484  const size_t lclNumRows = rowMap.is_null () ?
485  static_cast<size_t> (0) : rowMap->getLocalNumElements ();
486  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
487  static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
488  std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
489  << " != the local number of rows " << lclNumRows << " as specified by "
490  "the input row Map.");
491 
492  if (debug_) {
493  for (size_t r = 0; r < lclNumRows; ++r) {
494  const size_t curRowCount = numEntPerRow[r];
495  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
496  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
497  std::invalid_argument, "numEntPerRow(" << r << ") "
498  "specifies an invalid number of entries "
499  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
500  }
501  }
502 
503  // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
504  // The latter is a const View, so we have to copy into a nonconst
505  // View first, then assign.
506  typedef decltype (k_numAllocPerRow_) out_view_type;
507  typedef typename out_view_type::non_const_type nc_view_type;
508  typedef Kokkos::View<const size_t*,
509  typename nc_view_type::array_layout,
510  Kokkos::HostSpace,
511  Kokkos::MemoryUnmanaged> in_view_type;
512  in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
513  nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
514  lclNumRows);
515  // DEEP_COPY REVIEW - HOST-TO-HOSTMIRROR
516  using exec_space = typename nc_view_type::execution_space;
517  Kokkos::deep_copy (exec_space(), numAllocPerRowOut, numAllocPerRowIn);
518  k_numAllocPerRow_ = numAllocPerRowOut;
519 
520  resumeFill (params);
522  }
523 
524 
525  template <class LocalOrdinal, class GlobalOrdinal, class Node>
528  const Teuchos::RCP<const map_type>& rowMap,
529  const Teuchos::RCP<Teuchos::ParameterList>& params) :
530  dist_object_type (rowMap)
531  , rowMap_(rowMap)
532  , colMap_(originalGraph.colMap_)
533  , numAllocForAllRows_(originalGraph.numAllocForAllRows_)
534  , storageStatus_(originalGraph.storageStatus_)
535  , indicesAreAllocated_(originalGraph.indicesAreAllocated_)
536  , indicesAreLocal_(originalGraph.indicesAreLocal_)
537  , indicesAreSorted_(originalGraph.indicesAreSorted_)
538  {
539  staticAssertions();
540 
541  int numRows = rowMap->getLocalNumElements();
542  size_t numNonZeros = originalGraph.rowPtrsPacked_host_(numRows);
543  auto rowsToUse = Kokkos::pair<size_t, size_t>(0, numRows+1);
544 
545  rowPtrsUnpacked_dev_ = Kokkos::subview(originalGraph.rowPtrsUnpacked_dev_, rowsToUse);
546  rowPtrsUnpacked_host_ = Kokkos::subview(originalGraph.rowPtrsUnpacked_host_, rowsToUse);
547 
548  rowPtrsPacked_dev_ = Kokkos::subview(originalGraph.rowPtrsPacked_dev_, rowsToUse);
549  rowPtrsPacked_host_ = Kokkos::subview(originalGraph.rowPtrsPacked_host_, rowsToUse);
550 
551  if (indicesAreLocal_) {
552  lclIndsUnpacked_wdv = local_inds_wdv_type(originalGraph.lclIndsUnpacked_wdv, 0, numNonZeros);
553  lclIndsPacked_wdv = local_inds_wdv_type(originalGraph.lclIndsPacked_wdv, 0, numNonZeros);
554  }
555  else {
556  gblInds_wdv = global_inds_wdv_type(originalGraph.gblInds_wdv, 0, numNonZeros);
557  }
558 
560  }
561 
562  template <class LocalOrdinal, class GlobalOrdinal, class Node>
564  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
565  const Teuchos::RCP<const map_type>& colMap,
566  const typename local_graph_device_type::row_map_type& rowPointers,
567  const typename local_graph_device_type::entries_type::non_const_type& columnIndices,
568  const Teuchos::RCP<Teuchos::ParameterList>& params) :
569  dist_object_type (rowMap)
570  , rowMap_(rowMap)
571  , colMap_(colMap)
572  , numAllocForAllRows_(0)
573  , storageStatus_(Details::STORAGE_1D_PACKED)
574  , indicesAreAllocated_(true)
575  , indicesAreLocal_(true)
576  {
577  staticAssertions ();
578  if (! params.is_null() && params->isParameter("sorted") &&
579  ! params->get<bool>("sorted")) {
580  indicesAreSorted_ = false;
581  }
582  else {
583  indicesAreSorted_ = true;
584  }
585  setAllIndices (rowPointers, columnIndices);
587  }
588 
589  template <class LocalOrdinal, class GlobalOrdinal, class Node>
591  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
592  const Teuchos::RCP<const map_type>& colMap,
593  const Teuchos::ArrayRCP<size_t>& rowPointers,
594  const Teuchos::ArrayRCP<LocalOrdinal> & columnIndices,
595  const Teuchos::RCP<Teuchos::ParameterList>& params) :
596  dist_object_type (rowMap)
597  , rowMap_ (rowMap)
598  , colMap_ (colMap)
599  , numAllocForAllRows_ (0)
600  , storageStatus_ (Details::STORAGE_1D_PACKED)
601  , indicesAreAllocated_ (true)
602  , indicesAreLocal_ (true)
603  {
604  staticAssertions ();
605  if (! params.is_null() && params->isParameter("sorted") &&
606  ! params->get<bool>("sorted")) {
607  indicesAreSorted_ = false;
608  }
609  else {
610  indicesAreSorted_ = true;
611  }
612  setAllIndices (rowPointers, columnIndices);
614  }
615 
616  template <class LocalOrdinal, class GlobalOrdinal, class Node>
618  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
619  const Teuchos::RCP<const map_type>& colMap,
620  const local_graph_device_type& k_local_graph_,
621  const Teuchos::RCP<Teuchos::ParameterList>& params)
622  : CrsGraph (k_local_graph_,
623  rowMap,
624  colMap,
625  Teuchos::null,
626  Teuchos::null,
627  params)
628  {}
629 
630  template <class LocalOrdinal, class GlobalOrdinal, class Node>
632  CrsGraph (const local_graph_device_type& k_local_graph_,
633  const Teuchos::RCP<const map_type>& rowMap,
634  const Teuchos::RCP<const map_type>& colMap,
635  const Teuchos::RCP<const map_type>& domainMap,
636  const Teuchos::RCP<const map_type>& rangeMap,
637  const Teuchos::RCP<Teuchos::ParameterList>& params)
638  : DistObject<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, node_type> (rowMap)
639  , rowMap_ (rowMap)
640  , colMap_ (colMap)
641  , numAllocForAllRows_ (0)
642  , storageStatus_ (Details::STORAGE_1D_PACKED)
643  , indicesAreAllocated_ (true)
644  , indicesAreLocal_ (true)
645  {
646  staticAssertions();
647  const char tfecfFuncName[] = "CrsGraph(Kokkos::LocalStaticCrsGraph,Map,Map,Map,Map)";
648 
649  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
650  colMap.is_null (), std::runtime_error,
651  ": The input column Map must be nonnull.");
652  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
653  k_local_graph_.numRows () != rowMap->getLocalNumElements (),
654  std::runtime_error,
655  ": The input row Map and the input local graph need to have the same "
656  "number of rows. The row Map claims " << rowMap->getLocalNumElements ()
657  << " row(s), but the local graph claims " << k_local_graph_.numRows ()
658  << " row(s).");
659 
660  // NOTE (mfh 17 Mar 2014) getLocalNumRows() returns
661  // rowMap_->getLocalNumElements(), but it doesn't have to.
662  // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
663  // k_local_graph_.numRows () != getLocalNumRows (), std::runtime_error,
664  // ": The input row Map and the input local graph need to have the same "
665  // "number of rows. The row Map claims " << getLocalNumRows () << " row(s), "
666  // "but the local graph claims " << k_local_graph_.numRows () << " row(s).");
667  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
668  lclIndsUnpacked_wdv.extent (0) != 0 || gblInds_wdv.extent (0) != 0, std::logic_error,
669  ": cannot have 1D data structures allocated.");
670 
671  if(! params.is_null() && params->isParameter("sorted") &&
672  ! params->get<bool>("sorted")) {
673  indicesAreSorted_ = false;
674  }
675  else {
676  indicesAreSorted_ = true;
677  }
678 
679  setDomainRangeMaps (domainMap.is_null() ? rowMap_ : domainMap,
680  rangeMap .is_null() ? rowMap_ : rangeMap);
681  Teuchos::Array<int> remotePIDs (0); // unused output argument
682  this->makeImportExport (remotePIDs, false);
683 
684  lclIndsPacked_wdv = local_inds_wdv_type(k_local_graph_.entries);
686  this->setRowPtrs(k_local_graph_.row_map);
687 
688  set_need_sync_host_uvm_access(); // lclGraph_ potentially still in a kernel
689 
690  const bool callComputeGlobalConstants = params.get () == nullptr ||
691  params->get ("compute global constants", true);
692 
693  if (callComputeGlobalConstants) {
694  this->computeGlobalConstants ();
695  }
696  this->fillComplete_ = true;
697  this->checkInternalState ();
698  }
699 
700  template <class LocalOrdinal, class GlobalOrdinal, class Node>
703  const Teuchos::RCP<const map_type>& rowMap,
704  const Teuchos::RCP<const map_type>& colMap,
705  const Teuchos::RCP<const map_type>& domainMap,
706  const Teuchos::RCP<const map_type>& rangeMap,
707  const Teuchos::RCP<const import_type>& importer,
708  const Teuchos::RCP<const export_type>& exporter,
709  const Teuchos::RCP<Teuchos::ParameterList>& params) :
710  DistObject<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, node_type> (rowMap),
711  rowMap_ (rowMap),
712  colMap_ (colMap),
713  rangeMap_ (rangeMap.is_null () ? rowMap : rangeMap),
714  domainMap_ (domainMap.is_null () ? rowMap : domainMap),
715  importer_ (importer),
716  exporter_ (exporter),
717  numAllocForAllRows_ (0),
718  storageStatus_ (Details::STORAGE_1D_PACKED),
719  indicesAreAllocated_ (true),
720  indicesAreLocal_ (true)
721  {
722  staticAssertions();
723  const char tfecfFuncName[] = "Tpetra::CrsGraph(local_graph_device_type,"
724  "Map,Map,Map,Map,Import,Export,params): ";
725 
726  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
727  (colMap.is_null (), std::runtime_error,
728  "The input column Map must be nonnull.");
729 
730  lclIndsPacked_wdv = local_inds_wdv_type(lclGraph.entries);
732  setRowPtrs(lclGraph.row_map);
733 
734  set_need_sync_host_uvm_access(); // lclGraph_ potentially still in a kernel
735 
736  if (! params.is_null() && params->isParameter("sorted") &&
737  ! params->get<bool>("sorted")) {
738  indicesAreSorted_ = false;
739  }
740  else {
741  indicesAreSorted_ = true;
742  }
743 
744  const bool callComputeGlobalConstants =
745  params.get () == nullptr ||
746  params->get ("compute global constants", true);
747  if (callComputeGlobalConstants) {
748  this->computeGlobalConstants ();
749  }
750  fillComplete_ = true;
752  }
753 
754  template <class LocalOrdinal, class GlobalOrdinal, class Node>
755  Teuchos::RCP<const Teuchos::ParameterList>
758  {
759  using Teuchos::RCP;
760  using Teuchos::ParameterList;
761  using Teuchos::parameterList;
762 
763  RCP<ParameterList> params = parameterList ("Tpetra::CrsGraph");
764 
765  // Make a sublist for the Import.
766  RCP<ParameterList> importSublist = parameterList ("Import");
767 
768  // FIXME (mfh 02 Apr 2012) We should really have the Import and
769  // Export objects fill in these lists. However, we don't want to
770  // create an Import or Export unless we need them. For now, we
771  // know that the Import and Export just pass the list directly to
772  // their Distributor, so we can create a Distributor here
773  // (Distributor's constructor is a lightweight operation) and have
774  // it fill in the list.
775 
776  // Fill in Distributor default parameters by creating a
777  // Distributor and asking it to do the work.
778  Distributor distributor (rowMap_->getComm (), importSublist);
779  params->set ("Import", *importSublist, "How the Import performs communication.");
780 
781  // Make a sublist for the Export. For now, it's a clone of the
782  // Import sublist. It's not a shallow copy, though, since we
783  // might like the Import to do communication differently than the
784  // Export.
785  params->set ("Export", *importSublist, "How the Export performs communication.");
786 
787  return params;
788  }
789 
790  template <class LocalOrdinal, class GlobalOrdinal, class Node>
791  void
793  setParameterList (const Teuchos::RCP<Teuchos::ParameterList>& params)
794  {
795  Teuchos::RCP<const Teuchos::ParameterList> validParams =
796  getValidParameters ();
797  params->validateParametersAndSetDefaults (*validParams);
798  this->setMyParamList (params);
799  }
800 
801  template <class LocalOrdinal, class GlobalOrdinal, class Node>
805  {
806  return rowMap_->getGlobalNumElements ();
807  }
808 
809  template <class LocalOrdinal, class GlobalOrdinal, class Node>
813  {
814  const char tfecfFuncName[] = "getGlobalNumCols: ";
815  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
816  ! isFillComplete () || getDomainMap ().is_null (), std::runtime_error,
817  "The graph does not have a domain Map. You may not call this method in "
818  "that case.");
819  return getDomainMap ()->getGlobalNumElements ();
820  }
821 
822 
823  template <class LocalOrdinal, class GlobalOrdinal, class Node>
824  size_t
827  {
828  return this->rowMap_.is_null () ?
829  static_cast<size_t> (0) :
830  this->rowMap_->getLocalNumElements ();
831  }
832 
833 
834  template <class LocalOrdinal, class GlobalOrdinal, class Node>
835  size_t
838  {
839  const char tfecfFuncName[] = "getLocalNumCols: ";
840  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
841  ! hasColMap (), std::runtime_error,
842  "The graph does not have a column Map. You may not call this method "
843  "unless the graph has a column Map. This requires either that a custom "
844  "column Map was given to the constructor, or that fillComplete() has "
845  "been called.");
846  return colMap_.is_null () ? static_cast<size_t> (0) :
847  colMap_->getLocalNumElements ();
848  }
849 
850 
851 
852  template <class LocalOrdinal, class GlobalOrdinal, class Node>
853  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
855  getRowMap () const
856  {
857  return rowMap_;
858  }
859 
860  template <class LocalOrdinal, class GlobalOrdinal, class Node>
861  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
863  getColMap () const
864  {
865  return colMap_;
866  }
867 
868  template <class LocalOrdinal, class GlobalOrdinal, class Node>
869  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
872  {
873  return domainMap_;
874  }
875 
876  template <class LocalOrdinal, class GlobalOrdinal, class Node>
877  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
879  getRangeMap () const
880  {
881  return rangeMap_;
882  }
883 
884  template <class LocalOrdinal, class GlobalOrdinal, class Node>
885  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::import_type>
887  getImporter () const
888  {
889  return importer_;
890  }
891 
892  template <class LocalOrdinal, class GlobalOrdinal, class Node>
893  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::export_type>
895  getExporter () const
896  {
897  return exporter_;
898  }
899 
900  template <class LocalOrdinal, class GlobalOrdinal, class Node>
901  bool
903  hasColMap () const
904  {
905  return ! colMap_.is_null ();
906  }
907 
908  template <class LocalOrdinal, class GlobalOrdinal, class Node>
909  bool
912  {
913  // FIXME (mfh 07 Aug 2014) Why wouldn't storage be optimized if
914  // getLocalNumRows() is zero?
915 
916  const bool isOpt = indicesAreAllocated_ &&
917  k_numRowEntries_.extent (0) == 0 &&
918  getLocalNumRows () > 0;
919 
920  return isOpt;
921  }
922 
923 
924  template <class LocalOrdinal, class GlobalOrdinal, class Node>
928  {
929  const char tfecfFuncName[] = "getGlobalNumEntries: ";
930  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
931  (! this->haveGlobalConstants_, std::logic_error,
932  "The graph does not have global constants computed, "
933  "but the user has requested them.");
934 
935  return globalNumEntries_;
936  }
937 
938 
939  template <class LocalOrdinal, class GlobalOrdinal, class Node>
940  size_t
943  {
944  typedef LocalOrdinal LO;
945 
946  if (this->indicesAreAllocated_) {
947  const LO lclNumRows = this->getLocalNumRows ();
948  if (lclNumRows == 0) {
949  return static_cast<size_t> (0);
950  }
951  else {
952  // Avoid the "*this capture" issue by creating a local Kokkos::View.
953  auto numEntPerRow = this->k_numRowEntries_;
954  const LO numNumEntPerRow = numEntPerRow.extent (0);
955  if (numNumEntPerRow == 0) {
956  if (static_cast<LO> (this->rowPtrsPacked_dev_.extent (0)) <
957  static_cast<LO> (lclNumRows + 1)) {
958  return static_cast<size_t> (0);
959  }
960  else {
961  return this->rowPtrsPacked_host_(lclNumRows);
962  }
963  }
964  else { // k_numRowEntries_ is populated
965  // k_numRowEntries_ is actually be a host View, so we run
966  // the sum in its native execution space. This also means
967  // that we can use explicit capture (which could perhaps
968  // improve build time) instead of KOKKOS_LAMBDA, and avoid
969  // any CUDA build issues with trying to run a __device__ -
970  // only function on host.
971  typedef typename num_row_entries_type::execution_space
972  host_exec_space;
973  typedef Kokkos::RangePolicy<host_exec_space, LO> range_type;
974 
975  const LO upperLoopBound = lclNumRows < numNumEntPerRow ?
976  lclNumRows :
977  numNumEntPerRow;
978  size_t nodeNumEnt = 0;
979  Kokkos::parallel_reduce ("Tpetra::CrsGraph::getNumNodeEntries",
980  range_type (0, upperLoopBound),
981  [=] (const LO& k, size_t& lclSum) {
982  lclSum += numEntPerRow(k);
983  }, nodeNumEnt);
984  return nodeNumEnt;
985  }
986  }
987  }
988  else { // nothing allocated on this process, so no entries
989  return static_cast<size_t> (0);
990  }
991  }
992 
993  template <class LocalOrdinal, class GlobalOrdinal, class Node>
997  {
998  const char tfecfFuncName[] = "getGlobalMaxNumRowEntries: ";
999  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1000  (! this->haveGlobalConstants_, std::logic_error,
1001  "The graph does not have global constants computed, "
1002  "but the user has requested them.");
1003 
1004  return globalMaxNumRowEntries_;
1005  }
1006 
1007  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1008  size_t
1011  {
1012  return nodeMaxNumRowEntries_;
1013  }
1014 
1015  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1016  bool
1019  {
1020  return fillComplete_;
1021  }
1022 
1023  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1024  bool
1027  {
1028  return ! fillComplete_;
1029  }
1030 
1031 
1032  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1033  bool
1036  {
1037  return indicesAreLocal_;
1038  }
1039 
1040  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1041  bool
1044  {
1045  return indicesAreGlobal_;
1046  }
1047 
1048  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1049  size_t
1052  {
1053  typedef LocalOrdinal LO;
1054 
1055  if (this->indicesAreAllocated_) {
1056  const LO lclNumRows = this->getLocalNumRows ();
1057  if (lclNumRows == 0) {
1058  return static_cast<size_t> (0);
1059  }
1060  else if (storageStatus_ == Details::STORAGE_1D_PACKED) {
1061  if (static_cast<LO> (this->rowPtrsPacked_dev_.extent (0)) <
1062  static_cast<LO> (lclNumRows + 1)) {
1063  return static_cast<size_t> (0);
1064  }
1065  else {
1066  return this->rowPtrsPacked_host_(lclNumRows);
1067  }
1068  }
1069  else if (storageStatus_ == Details::STORAGE_1D_UNPACKED) {
1070  if (rowPtrsUnpacked_host_.extent (0) == 0) {
1071  return static_cast<size_t> (0);
1072  }
1073  else {
1074  return rowPtrsUnpacked_host_(lclNumRows);
1075  }
1076  }
1077  else {
1078  return static_cast<size_t> (0);
1079  }
1080  }
1081  else {
1082  return Tpetra::Details::OrdinalTraits<size_t>::invalid ();
1083  }
1084  }
1085 
1086  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1087  Teuchos::RCP<const Teuchos::Comm<int> >
1089  getComm () const
1090  {
1091  return this->rowMap_.is_null () ? Teuchos::null : this->rowMap_->getComm ();
1092  }
1093 
1094  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1095  GlobalOrdinal
1098  {
1099  return rowMap_->getIndexBase ();
1100  }
1101 
1102  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1103  bool
1105  indicesAreAllocated () const
1106  {
1107  return indicesAreAllocated_;
1108  }
1109 
1110  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1111  bool
1113  isSorted () const
1114  {
1115  return indicesAreSorted_;
1116  }
1117 
1118  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1119  bool
1121  isMerged () const
1122  {
1123  return noRedundancies_;
1124  }
1125 
1126  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1127  void
1130  {
1131  // FIXME (mfh 07 May 2013) How do we know that the change
1132  // introduced a redundancy, or even that it invalidated the sorted
1133  // order of indices? CrsGraph has always made this conservative
1134  // guess. It could be a bit costly to check at insertion time,
1135  // though.
1136  indicesAreSorted_ = false;
1137  noRedundancies_ = false;
1138 
1139  // We've modified the graph, so we'll have to recompute local
1140  // constants like the number of diagonal entries on this process.
1141  haveLocalConstants_ = false;
1142  }
1143 
1144  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1145  void
1147  allocateIndices (const ELocalGlobal lg, const bool verbose)
1148  {
1150  using Teuchos::arcp;
1151  using Teuchos::Array;
1152  using Teuchos::ArrayRCP;
1153  using std::endl;
1154  typedef Teuchos::ArrayRCP<size_t>::size_type size_type;
1155  typedef typename local_graph_device_type::row_map_type::non_const_type
1156  non_const_row_map_type;
1157  const char tfecfFuncName[] = "allocateIndices: ";
1158  const char suffix[] =
1159  " Please report this bug to the Tpetra developers.";
1160  ProfilingRegion profRegion("Tpetra::CrsGraph::allocateIndices");
1161 
1162  std::unique_ptr<std::string> prefix;
1163  if (verbose) {
1164  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
1165  std::ostringstream os;
1166  os << *prefix << "Start: lg="
1167  << (lg == GlobalIndices ? "GlobalIndices" : "LocalIndices")
1168  << ", numRows: " << this->getLocalNumRows() << endl;
1169  std::cerr << os.str();
1170  }
1171 
1172  // This is a protected function, only callable by us. If it was
1173  // called incorrectly, it is our fault. That's why the tests
1174  // below throw std::logic_error instead of std::invalid_argument.
1175  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1176  (isLocallyIndexed () && lg == GlobalIndices, std::logic_error,
1177  ": The graph is locally indexed, but Tpetra code is calling "
1178  "this method with lg=GlobalIndices." << suffix);
1179  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1180  (isGloballyIndexed () && lg == LocalIndices, std::logic_error,
1181  ": The graph is globally indexed, but Tpetra code is calling "
1182  "this method with lg=LocalIndices." << suffix);
1183  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1184  (indicesAreAllocated (), std::logic_error, ": The graph's "
1185  "indices are already allocated, but Tpetra is calling "
1186  "allocateIndices again." << suffix);
1187  const size_t numRows = this->getLocalNumRows ();
1188 
1189  //
1190  // STATIC ALLOCATION PROFILE
1191  //
1192  {
1193  if (verbose) {
1194  std::ostringstream os;
1195  os << *prefix << "Allocate k_rowPtrs: " << (numRows+1) << endl;
1196  std::cerr << os.str();
1197  }
1198  non_const_row_map_type k_rowPtrs ("Tpetra::CrsGraph::ptr", numRows + 1);
1199 
1200  if (this->k_numAllocPerRow_.extent (0) != 0) {
1201  // It's OK to throw std::invalid_argument here, because we
1202  // haven't incurred any side effects yet. Throwing that
1203  // exception (and not, say, std::logic_error) implies that the
1204  // instance can recover.
1205  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1206  (this->k_numAllocPerRow_.extent (0) != numRows,
1207  std::invalid_argument, "k_numAllocPerRow_ is allocated, that is, "
1208  "has nonzero length " << this->k_numAllocPerRow_.extent (0)
1209  << ", but its length != numRows = " << numRows << ".");
1210 
1211  // k_numAllocPerRow_ is a host View, but k_rowPtrs (the thing
1212  // we want to compute here) lives on device. That's OK;
1213  // computeOffsetsFromCounts can handle this case.
1215 
1216  // FIXME (mfh 27 Jun 2016) Currently, computeOffsetsFromCounts
1217  // doesn't attempt to check its input for "invalid" flag
1218  // values. For now, we omit that feature of the sequential
1219  // code disabled below.
1220  computeOffsetsFromCounts (k_rowPtrs, k_numAllocPerRow_);
1221  }
1222  else {
1223  // It's OK to throw std::invalid_argument here, because we
1224  // haven't incurred any side effects yet. Throwing that
1225  // exception (and not, say, std::logic_error) implies that the
1226  // instance can recover.
1227  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1228  (this->numAllocForAllRows_ ==
1229  Tpetra::Details::OrdinalTraits<size_t>::invalid (),
1230  std::invalid_argument, "numAllocForAllRows_ has an invalid value, "
1231  "namely Tpetra::Details::OrdinalTraits<size_t>::invalid() = " <<
1232  Tpetra::Details::OrdinalTraits<size_t>::invalid () << ".");
1233 
1235  computeOffsetsFromConstantCount (k_rowPtrs, this->numAllocForAllRows_);
1236  }
1237 
1238  // "Commit" the resulting row offsets.
1239  setRowPtrsUnpacked(k_rowPtrs);
1240  }
1241 
1242  const size_type numInds = rowPtrsUnpacked_host_(numRows);
1243  if (lg == LocalIndices) {
1244  if (verbose) {
1245  std::ostringstream os;
1246  os << *prefix << "Allocate local column indices "
1247  "lclIndsUnpacked_wdv: " << numInds << endl;
1248  std::cerr << os.str();
1249  }
1250  lclIndsUnpacked_wdv = local_inds_wdv_type (
1251  local_inds_dualv_type("Tpetra::CrsGraph::lclInd",numInds));
1252  }
1253  else {
1254  if (verbose) {
1255  std::ostringstream os;
1256  os << *prefix << "Allocate global column indices "
1257  "gblInds_wdv: " << numInds << endl;
1258  std::cerr << os.str();
1259  }
1260  gblInds_wdv = global_inds_wdv_type (
1261  global_inds_dualv_type("Tpetra::CrsGraph::gblInd",numInds));
1262  }
1263  storageStatus_ = Details::STORAGE_1D_UNPACKED;
1264 
1265  this->indicesAreLocal_ = (lg == LocalIndices);
1266  this->indicesAreGlobal_ = (lg == GlobalIndices);
1267 
1268  if (numRows > 0) { // reallocate k_numRowEntries_ & fill w/ 0s
1269  using Kokkos::ViewAllocateWithoutInitializing;
1270  typedef decltype (k_numRowEntries_) row_ent_type;
1271  const char label[] = "Tpetra::CrsGraph::numRowEntries";
1272  if (verbose) {
1273  std::ostringstream os;
1274  os << *prefix << "Allocate k_numRowEntries_: " << numRows
1275  << endl;
1276  std::cerr << os.str();
1277  }
1278  row_ent_type numRowEnt (ViewAllocateWithoutInitializing (label), numRows);
1279  // DEEP_COPY REVIEW - VALUE-TO-HOSTMIRROR
1280  Kokkos::deep_copy (execution_space(), numRowEnt, static_cast<size_t> (0)); // fill w/ 0s
1281  Kokkos::fence(); // TODO: Need to understand downstream failure points and move this fence.
1282  this->k_numRowEntries_ = numRowEnt; // "commit" our allocation
1283  }
1284 
1285  // Once indices are allocated, CrsGraph needs to free this information.
1286  this->numAllocForAllRows_ = 0;
1287  this->k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
1288  this->indicesAreAllocated_ = true;
1289 
1290  try {
1291  this->checkInternalState ();
1292  }
1293  catch (std::logic_error& e) {
1294  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1295  (true, std::logic_error, "At end of allocateIndices, "
1296  "checkInternalState threw std::logic_error: "
1297  << e.what ());
1298  }
1299  catch (std::exception& e) {
1300  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1301  (true, std::runtime_error, "At end of allocateIndices, "
1302  "checkInternalState threw std::exception: "
1303  << e.what ());
1304  }
1305  catch (...) {
1306  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1307  (true, std::runtime_error, "At end of allocateIndices, "
1308  "checkInternalState threw an exception "
1309  "not a subclass of std::exception.");
1310  }
1311 
1312  if (verbose) {
1313  std::ostringstream os;
1314  os << *prefix << "Done" << endl;
1315  std::cerr << os.str();
1316  }
1317  }
1318 
1319  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1320  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1321  local_inds_dualv_type::t_host::const_type
1323  getLocalIndsViewHost (const RowInfo& rowinfo) const
1324  {
1325  if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1326  return typename local_inds_dualv_type::t_host::const_type ();
1327  else
1328  return lclIndsUnpacked_wdv.getHostSubview(rowinfo.offset1D,
1329  rowinfo.allocSize,
1330  Access::ReadOnly);
1331  }
1332 
1333  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1335  local_inds_dualv_type::t_host
1338  {
1339  if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1340  return typename local_inds_dualv_type::t_host ();
1341  else
1342  return lclIndsUnpacked_wdv.getHostSubview(rowinfo.offset1D,
1343  rowinfo.allocSize,
1344  Access::ReadWrite);
1345  }
1346 
1347  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1349  global_inds_dualv_type::t_host::const_type
1351  getGlobalIndsViewHost (const RowInfo& rowinfo) const
1352  {
1353  if (rowinfo.allocSize == 0 || gblInds_wdv.extent(0) == 0)
1354  return typename global_inds_dualv_type::t_host::const_type ();
1355  else
1356  return gblInds_wdv.getHostSubview(rowinfo.offset1D,
1357  rowinfo.allocSize,
1358  Access::ReadOnly);
1359  }
1360 
1361  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1363  local_inds_dualv_type::t_dev::const_type
1365  getLocalIndsViewDevice (const RowInfo& rowinfo) const
1366  {
1367  if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1368  return typename local_inds_dualv_type::t_dev::const_type ();
1369  else
1370  return lclIndsUnpacked_wdv.getDeviceSubview(rowinfo.offset1D,
1371  rowinfo.allocSize,
1372  Access::ReadOnly);
1373  }
1374 
1375  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1377  global_inds_dualv_type::t_dev::const_type
1379  getGlobalIndsViewDevice (const RowInfo& rowinfo) const
1380  {
1381  if (rowinfo.allocSize == 0 || gblInds_wdv.extent(0) == 0)
1382  return typename global_inds_dualv_type::t_dev::const_type ();
1383  else
1384  return gblInds_wdv.getDeviceSubview(rowinfo.offset1D,
1385  rowinfo.allocSize,
1386  Access::ReadOnly);
1387  }
1388 
1389 
1390  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1391  RowInfo
1393  getRowInfo (const LocalOrdinal myRow) const
1394  {
1395  const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1396  RowInfo ret;
1397  if (this->rowMap_.is_null () || ! this->rowMap_->isNodeLocalElement (myRow)) {
1398  ret.localRow = STINV;
1399  ret.allocSize = 0;
1400  ret.numEntries = 0;
1401  ret.offset1D = STINV;
1402  return ret;
1403  }
1404 
1405  ret.localRow = static_cast<size_t> (myRow);
1406  if (this->indicesAreAllocated ()) {
1407  // Offsets tell us the allocation size in this case.
1408  if (rowPtrsUnpacked_host_.extent (0) == 0) {
1409  ret.offset1D = 0;
1410  ret.allocSize = 0;
1411  }
1412  else {
1413  ret.offset1D = rowPtrsUnpacked_host_(myRow);
1414  ret.allocSize = rowPtrsUnpacked_host_(myRow+1) - rowPtrsUnpacked_host_(myRow);
1415  }
1416 
1417  ret.numEntries = (this->k_numRowEntries_.extent (0) == 0) ?
1418  ret.allocSize :
1419  this->k_numRowEntries_(myRow);
1420  }
1421  else { // haven't performed allocation yet; probably won't hit this code
1422  // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1423  // allocate, rather than doing lazy allocation at first insert.
1424  // This will make k_numAllocPerRow_ obsolete.
1425  ret.allocSize = (this->k_numAllocPerRow_.extent (0) != 0) ?
1426  this->k_numAllocPerRow_(myRow) : // this is a host View
1427  this->numAllocForAllRows_;
1428  ret.numEntries = 0;
1429  ret.offset1D = STINV;
1430  }
1431 
1432  return ret;
1433  }
1434 
1435 
1436  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1437  RowInfo
1439  getRowInfoFromGlobalRowIndex (const GlobalOrdinal gblRow) const
1440  {
1441  const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1442  RowInfo ret;
1443  if (this->rowMap_.is_null ()) {
1444  ret.localRow = STINV;
1445  ret.allocSize = 0;
1446  ret.numEntries = 0;
1447  ret.offset1D = STINV;
1448  return ret;
1449  }
1450  const LocalOrdinal myRow = this->rowMap_->getLocalElement (gblRow);
1451  if (myRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid ()) {
1452  ret.localRow = STINV;
1453  ret.allocSize = 0;
1454  ret.numEntries = 0;
1455  ret.offset1D = STINV;
1456  return ret;
1457  }
1458 
1459  ret.localRow = static_cast<size_t> (myRow);
1460  if (this->indicesAreAllocated ()) {
1461  // graph data structures have the info that we need
1462  //
1463  // if static graph, offsets tell us the allocation size
1464  if (rowPtrsUnpacked_host_.extent (0) == 0) {
1465  ret.offset1D = 0;
1466  ret.allocSize = 0;
1467  }
1468  else {
1469  ret.offset1D = rowPtrsUnpacked_host_(myRow);
1470  ret.allocSize = rowPtrsUnpacked_host_(myRow+1) - rowPtrsUnpacked_host_(myRow);
1471  }
1472 
1473  ret.numEntries = (this->k_numRowEntries_.extent (0) == 0) ?
1474  ret.allocSize :
1475  this->k_numRowEntries_(myRow);
1476  }
1477  else { // haven't performed allocation yet; probably won't hit this code
1478  // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1479  // allocate, rather than doing lazy allocation at first insert.
1480  // This will make k_numAllocPerRow_ obsolete.
1481  ret.allocSize = (this->k_numAllocPerRow_.extent (0) != 0) ?
1482  this->k_numAllocPerRow_(myRow) : // this is a host View
1483  this->numAllocForAllRows_;
1484  ret.numEntries = 0;
1485  ret.offset1D = STINV;
1486  }
1487 
1488  return ret;
1489  }
1490 
1491 
1492  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1493  void
1495  staticAssertions () const
1496  {
1497  using Teuchos::OrdinalTraits;
1498  typedef LocalOrdinal LO;
1499  typedef GlobalOrdinal GO;
1500  typedef global_size_t GST;
1501 
1502  // Assumption: sizeof(GlobalOrdinal) >= sizeof(LocalOrdinal):
1503  // This is so that we can store local indices in the memory
1504  // formerly occupied by global indices.
1505  static_assert (sizeof (GlobalOrdinal) >= sizeof (LocalOrdinal),
1506  "Tpetra::CrsGraph: sizeof(GlobalOrdinal) must be >= sizeof(LocalOrdinal).");
1507  // Assumption: max(size_t) >= max(LocalOrdinal)
1508  // This is so that we can represent any LocalOrdinal as a size_t.
1509  static_assert (sizeof (size_t) >= sizeof (LocalOrdinal),
1510  "Tpetra::CrsGraph: sizeof(size_t) must be >= sizeof(LocalOrdinal).");
1511  static_assert (sizeof(GST) >= sizeof(size_t),
1512  "Tpetra::CrsGraph: sizeof(Tpetra::global_size_t) must be >= sizeof(size_t).");
1513 
1514  // FIXME (mfh 30 Sep 2015) We're not using
1515  // Teuchos::CompileTimeAssert any more. Can we do these checks
1516  // with static_assert?
1517 
1518  // can't call max() with CompileTimeAssert, because it isn't a
1519  // constant expression; will need to make this a runtime check
1520  const char msg[] = "Tpetra::CrsGraph: Object cannot be created with the "
1521  "given template arguments: size assumptions are not valid.";
1522  TEUCHOS_TEST_FOR_EXCEPTION(
1523  static_cast<size_t> (Teuchos::OrdinalTraits<LO>::max ()) > Teuchos::OrdinalTraits<size_t>::max (),
1524  std::runtime_error, msg);
1525  TEUCHOS_TEST_FOR_EXCEPTION(
1526  static_cast<GST> (Teuchos::OrdinalTraits<LO>::max ()) > static_cast<GST> (Teuchos::OrdinalTraits<GO>::max ()),
1527  std::runtime_error, msg);
1528  TEUCHOS_TEST_FOR_EXCEPTION(
1529  static_cast<size_t> (Teuchos::OrdinalTraits<GO>::max ()) > Teuchos::OrdinalTraits<GST>::max(),
1530  std::runtime_error, msg);
1531  TEUCHOS_TEST_FOR_EXCEPTION(
1532  Teuchos::OrdinalTraits<size_t>::max () > Teuchos::OrdinalTraits<GST>::max (),
1533  std::runtime_error, msg);
1534  }
1535 
1536 
1537  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1538  size_t
1541  const SLocalGlobalViews &newInds,
1542  const ELocalGlobal lg,
1543  const ELocalGlobal I)
1544  {
1545  using Teuchos::ArrayView;
1546  typedef LocalOrdinal LO;
1547  typedef GlobalOrdinal GO;
1548  const char tfecfFuncName[] = "insertIndices: ";
1549 
1550  size_t oldNumEnt = 0;
1551  if (debug_) {
1552  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1553  (lg != GlobalIndices && lg != LocalIndices, std::invalid_argument,
1554  "lg must be either GlobalIndices or LocalIndices.");
1555  oldNumEnt = this->getNumEntriesInLocalRow (rowinfo.localRow);
1556  }
1557 
1558  size_t numNewInds = 0;
1559  if (lg == GlobalIndices) { // input indices are global
1560  ArrayView<const GO> new_ginds = newInds.ginds;
1561  numNewInds = new_ginds.size();
1562  if (I == GlobalIndices) { // store global indices
1563  auto gind_view = gblInds_wdv.getHostView(Access::ReadWrite);
1564  if (debug_) {
1565  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1566  (static_cast<size_t> (gind_view.size ()) <
1567  rowinfo.numEntries + numNewInds, std::logic_error,
1568  "gind_view.size() = " << gind_view.size ()
1569  << " < rowinfo.numEntries (= " << rowinfo.numEntries
1570  << ") + numNewInds (= " << numNewInds << ").");
1571  }
1572  GO* const gblColInds_out = gind_view.data () + rowinfo.offset1D
1573  + rowinfo.numEntries;
1574  for (size_t k = 0; k < numNewInds; ++k) {
1575  gblColInds_out[k] = new_ginds[k];
1576  }
1577  }
1578  else if (I == LocalIndices) { // store local indices
1579  auto lind_view = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1580  if (debug_) {
1581  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1582  (static_cast<size_t> (lind_view.size ()) <
1583  rowinfo.numEntries + numNewInds, std::logic_error,
1584  "lind_view.size() = " << lind_view.size ()
1585  << " < rowinfo.numEntries (= " << rowinfo.numEntries
1586  << ") + numNewInds (= " << numNewInds << ").");
1587  }
1588  LO* const lclColInds_out = lind_view.data () + rowinfo.offset1D
1589  + rowinfo.numEntries;
1590  for (size_t k = 0; k < numNewInds; ++k) {
1591  lclColInds_out[k] = colMap_->getLocalElement (new_ginds[k]);
1592  }
1593  }
1594  }
1595  else if (lg == LocalIndices) { // input indices are local
1596  ArrayView<const LO> new_linds = newInds.linds;
1597  numNewInds = new_linds.size();
1598  if (I == LocalIndices) { // store local indices
1599  auto lind_view = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1600  if (debug_) {
1601  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1602  (static_cast<size_t> (lind_view.size ()) <
1603  rowinfo.numEntries + numNewInds, std::logic_error,
1604  "lind_view.size() = " << lind_view.size ()
1605  << " < rowinfo.numEntries (= " << rowinfo.numEntries
1606  << ") + numNewInds (= " << numNewInds << ").");
1607  }
1608  LO* const lclColInds_out = lind_view.data () + rowinfo.offset1D
1609  + rowinfo.numEntries;
1610  for (size_t k = 0; k < numNewInds; ++k) {
1611  lclColInds_out[k] = new_linds[k];
1612  }
1613  }
1614  else if (I == GlobalIndices) {
1615  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1616  (true, std::logic_error, "The case where the input indices are local "
1617  "and the indices to write are global (lg=LocalIndices, I="
1618  "GlobalIndices) is not implemented, because it does not make sense."
1619  << std::endl << "If you have correct local column indices, that "
1620  "means the graph has a column Map. In that case, you should be "
1621  "storing local indices.");
1622  }
1623  }
1624 
1625  rowinfo.numEntries += numNewInds;
1626  this->k_numRowEntries_(rowinfo.localRow) += numNewInds;
1627  this->setLocallyModified ();
1628 
1629  if (debug_) {
1630  const size_t chkNewNumEnt =
1631  this->getNumEntriesInLocalRow (rowinfo.localRow);
1632  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1633  (chkNewNumEnt != oldNumEnt + numNewInds, std::logic_error,
1634  "chkNewNumEnt = " << chkNewNumEnt
1635  << " != oldNumEnt (= " << oldNumEnt
1636  << ") + numNewInds (= " << numNewInds << ").");
1637  }
1638 
1639  return numNewInds;
1640  }
1641 
1642  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1643  size_t
1645  insertGlobalIndicesImpl (const LocalOrdinal lclRow,
1646  const GlobalOrdinal inputGblColInds[],
1647  const size_t numInputInds)
1648  {
1649  return this->insertGlobalIndicesImpl (this->getRowInfo (lclRow),
1650  inputGblColInds, numInputInds);
1651  }
1652 
1653  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1654  size_t
1657  const GlobalOrdinal inputGblColInds[],
1658  const size_t numInputInds,
1659  std::function<void(const size_t, const size_t, const size_t)> fun)
1660  {
1662  using Kokkos::View;
1663  using Kokkos::subview;
1664  using Kokkos::MemoryUnmanaged;
1665  using Teuchos::ArrayView;
1666  using LO = LocalOrdinal;
1667  using GO = GlobalOrdinal;
1668  const char tfecfFuncName[] = "insertGlobalIndicesImpl: ";
1669  const LO lclRow = static_cast<LO> (rowInfo.localRow);
1670 
1671  auto numEntries = rowInfo.numEntries;
1672  using inp_view_type = View<const GO*, Kokkos::HostSpace, MemoryUnmanaged>;
1673  inp_view_type inputInds(inputGblColInds, numInputInds);
1674  size_t numInserted;
1675  {
1676  auto gblIndsHostView = this->gblInds_wdv.getHostView(Access::ReadWrite);
1677  numInserted = Details::insertCrsIndices(lclRow, this->rowPtrsUnpacked_host_,
1678  gblIndsHostView,
1679  numEntries, inputInds, fun);
1680  }
1681 
1682  const bool insertFailed =
1683  numInserted == Teuchos::OrdinalTraits<size_t>::invalid();
1684  if(insertFailed) {
1685  constexpr size_t ONE (1);
1686  const int myRank = this->getComm()->getRank();
1687  std::ostringstream os;
1688 
1689  os << "Proc " << myRank << ": Not enough capacity to insert "
1690  << numInputInds
1691  << " ind" << (numInputInds != ONE ? "ices" : "ex")
1692  << " into local row " << lclRow << ", which currently has "
1693  << rowInfo.numEntries
1694  << " entr" << (rowInfo.numEntries != ONE ? "ies" : "y")
1695  << " and total allocation size " << rowInfo.allocSize
1696  << ". ";
1697  const size_t maxNumToPrint =
1699  ArrayView<const GO> inputGblColIndsView(inputGblColInds,
1700  numInputInds);
1701  verbosePrintArray(os, inputGblColIndsView, "Input global "
1702  "column indices", maxNumToPrint);
1703  os << ", ";
1704  auto curGblColInds = getGlobalIndsViewHost(rowInfo);
1705  ArrayView<const GO> curGblColIndsView(curGblColInds.data(),
1706  rowInfo.numEntries);
1707  verbosePrintArray(os, curGblColIndsView, "Current global "
1708  "column indices", maxNumToPrint);
1709  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1710  (true, std::runtime_error, os.str());
1711  }
1712 
1713  this->k_numRowEntries_(lclRow) += numInserted;
1714 
1715  this->setLocallyModified();
1716  return numInserted;
1717  }
1718 
1719 
1720  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1721  void
1723  insertLocalIndicesImpl (const LocalOrdinal myRow,
1724  const Teuchos::ArrayView<const LocalOrdinal>& indices,
1725  std::function<void(const size_t, const size_t, const size_t)> fun)
1726  {
1727  using Kokkos::MemoryUnmanaged;
1728  using Kokkos::subview;
1729  using Kokkos::View;
1730  using LO = LocalOrdinal;
1731  const char tfecfFuncName[] = "insertLocallIndicesImpl: ";
1732 
1733  const RowInfo rowInfo = this->getRowInfo(myRow);
1734 
1735  size_t numNewInds = 0;
1736  size_t newNumEntries = 0;
1737 
1738  auto numEntries = rowInfo.numEntries;
1739  // Note: Teuchos::ArrayViews are in HostSpace
1740  using inp_view_type = View<const LO*, Kokkos::HostSpace, MemoryUnmanaged>;
1741  inp_view_type inputInds(indices.getRawPtr(), indices.size());
1742  size_t numInserted = 0;
1743  {
1744  auto lclInds = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1745  numInserted = Details::insertCrsIndices(myRow, rowPtrsUnpacked_host_, lclInds,
1746  numEntries, inputInds, fun);
1747  }
1748 
1749  const bool insertFailed =
1750  numInserted == Teuchos::OrdinalTraits<size_t>::invalid();
1751  if(insertFailed) {
1752  constexpr size_t ONE (1);
1753  const size_t numInputInds(indices.size());
1754  const int myRank = this->getComm()->getRank();
1755  std::ostringstream os;
1756  os << "On MPI Process " << myRank << ": Not enough capacity to "
1757  "insert " << numInputInds
1758  << " ind" << (numInputInds != ONE ? "ices" : "ex")
1759  << " into local row " << myRow << ", which currently has "
1760  << rowInfo.numEntries
1761  << " entr" << (rowInfo.numEntries != ONE ? "ies" : "y")
1762  << " and total allocation size " << rowInfo.allocSize << ".";
1763  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1764  (true, std::runtime_error, os.str());
1765  }
1766  numNewInds = numInserted;
1767  newNumEntries = rowInfo.numEntries + numNewInds;
1768 
1769  this->k_numRowEntries_(myRow) += numNewInds;
1770  this->setLocallyModified ();
1771 
1772  if (debug_) {
1773  const size_t chkNewNumEntries = this->getNumEntriesInLocalRow (myRow);
1774  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1775  (chkNewNumEntries != newNumEntries, std::logic_error,
1776  "getNumEntriesInLocalRow(" << myRow << ") = " << chkNewNumEntries
1777  << " != newNumEntries = " << newNumEntries
1778  << ". Please report this bug to the Tpetra developers.");
1779  }
1780  }
1781 
1782  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1783  size_t
1786  const Teuchos::ArrayView<const GlobalOrdinal>& indices,
1787  std::function<void(const size_t, const size_t, const size_t)> fun) const
1788  {
1789  using GO = GlobalOrdinal;
1790  using Kokkos::View;
1791  using Kokkos::MemoryUnmanaged;
1792  auto invalidCount = Teuchos::OrdinalTraits<size_t>::invalid();
1793 
1794  using inp_view_type = View<const GO*, Kokkos::HostSpace, MemoryUnmanaged>;
1795  inp_view_type inputInds(indices.getRawPtr(), indices.size());
1796 
1797  size_t numFound = 0;
1798  LocalOrdinal lclRow = rowInfo.localRow;
1799  if (this->isLocallyIndexed())
1800  {
1801  if (this->colMap_.is_null())
1802  return invalidCount;
1803  const auto& colMap = *(this->colMap_);
1804  auto map = [&](GO const gblInd){return colMap.getLocalElement(gblInd);};
1805  numFound = Details::findCrsIndices(lclRow, rowPtrsUnpacked_host_,
1806  rowInfo.numEntries,
1807  lclIndsUnpacked_wdv.getHostView(Access::ReadOnly), inputInds, map, fun);
1808  }
1809  else if (this->isGloballyIndexed())
1810  {
1811  numFound = Details::findCrsIndices(lclRow, rowPtrsUnpacked_host_,
1812  rowInfo.numEntries,
1813  gblInds_wdv.getHostView(Access::ReadOnly), inputInds, fun);
1814  }
1815  return numFound;
1816  }
1817 
1818 
1819  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1820  size_t
1823  const bool sorted,
1824  const bool merged)
1825  {
1826  const size_t origNumEnt = rowInfo.numEntries;
1827  if (origNumEnt != Tpetra::Details::OrdinalTraits<size_t>::invalid () &&
1828  origNumEnt != 0) {
1829  auto lclColInds = this->getLocalIndsViewHostNonConst (rowInfo);
1830 
1831  LocalOrdinal* const lclColIndsRaw = lclColInds.data ();
1832  if (! sorted) {
1833  std::sort (lclColIndsRaw, lclColIndsRaw + origNumEnt);
1834  }
1835 
1836  if (! merged) {
1837  LocalOrdinal* const beg = lclColIndsRaw;
1838  LocalOrdinal* const end = beg + rowInfo.numEntries;
1839  LocalOrdinal* const newend = std::unique (beg, end);
1840  const size_t newNumEnt = newend - beg;
1841 
1842  // NOTE (mfh 08 May 2017) This is a host View, so it does not assume UVM.
1843  this->k_numRowEntries_(rowInfo.localRow) = newNumEnt;
1844  return origNumEnt - newNumEnt; // the number of duplicates in the row
1845  }
1846  else {
1847  return static_cast<size_t> (0); // assume no duplicates
1848  }
1849  }
1850  else {
1851  return static_cast<size_t> (0); // no entries in the row
1852  }
1853  }
1854 
1855 
1856  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1857  void
1859  setDomainRangeMaps (const Teuchos::RCP<const map_type>& domainMap,
1860  const Teuchos::RCP<const map_type>& rangeMap)
1861  {
1862  // simple pointer comparison for equality
1863  if (domainMap_ != domainMap) {
1864  domainMap_ = domainMap;
1865  importer_ = Teuchos::null;
1866  }
1867  if (rangeMap_ != rangeMap) {
1868  rangeMap_ = rangeMap;
1869  exporter_ = Teuchos::null;
1870  }
1871  }
1872 
1873 
1874  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1875  void
1878  {
1879  const auto INV = Teuchos::OrdinalTraits<global_size_t>::invalid();
1880 
1881  globalNumEntries_ = INV;
1882  globalMaxNumRowEntries_ = INV;
1883  haveGlobalConstants_ = false;
1884  }
1885 
1886 
1887  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1888  void
1891  {
1892  if (debug_) {
1893  using std::endl;
1894  const char tfecfFuncName[] = "checkInternalState: ";
1895  const char suffix[] = " Please report this bug to the Tpetra developers.";
1896 
1897  std::unique_ptr<std::string> prefix;
1898  if (verbose_) {
1899  prefix = this->createPrefix("CrsGraph", "checkInternalState");
1900  std::ostringstream os;
1901  os << *prefix << "Start" << endl;
1902  std::cerr << os.str();
1903  }
1904 
1905  const global_size_t GSTI = Teuchos::OrdinalTraits<global_size_t>::invalid ();
1906  //const size_t STI = Teuchos::OrdinalTraits<size_t>::invalid (); // unused
1907  // check the internal state of this data structure
1908  // this is called by numerous state-changing methods, in a debug build, to ensure that the object
1909  // always remains in a valid state
1910 
1911  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1912  (this->rowMap_.is_null (), std::logic_error,
1913  "Row Map is null." << suffix);
1914  // This may access the row Map, so we need to check first (above)
1915  // whether the row Map is null.
1916  const LocalOrdinal lclNumRows =
1917  static_cast<LocalOrdinal> (this->getLocalNumRows ());
1918 
1919  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1920  (this->isFillActive () == this->isFillComplete (), std::logic_error,
1921  "Graph cannot be both fill active and fill complete." << suffix);
1922  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1923  (this->isFillComplete () &&
1924  (this->colMap_.is_null () ||
1925  this->rangeMap_.is_null () ||
1926  this->domainMap_.is_null ()),
1927  std::logic_error,
1928  "Graph is full complete, but at least one of {column, range, domain} "
1929  "Map is null." << suffix);
1930  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1931  (this->isStorageOptimized () && ! this->indicesAreAllocated (),
1932  std::logic_error, "Storage is optimized, but indices are not "
1933  "allocated, not even trivially." << suffix);
1934 
1935  size_t nodeAllocSize = 0;
1936  try {
1937  nodeAllocSize = this->getLocalAllocationSize ();
1938  }
1939  catch (std::logic_error& e) {
1940  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1941  (true, std::runtime_error, "getLocalAllocationSize threw "
1942  "std::logic_error: " << e.what ());
1943  }
1944  catch (std::exception& e) {
1945  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1946  (true, std::runtime_error, "getLocalAllocationSize threw an "
1947  "std::exception: " << e.what ());
1948  }
1949  catch (...) {
1950  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1951  (true, std::runtime_error, "getLocalAllocationSize threw an exception "
1952  "not a subclass of std::exception.");
1953  }
1954 
1955  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1956  (this->isStorageOptimized () &&
1957  nodeAllocSize != this->getLocalNumEntries (),
1958  std::logic_error, "Storage is optimized, but "
1959  "this->getLocalAllocationSize() = " << nodeAllocSize
1960  << " != this->getLocalNumEntries() = " << this->getLocalNumEntries ()
1961  << "." << suffix);
1962  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1963  (! this->haveGlobalConstants_ &&
1964  (this->globalNumEntries_ != GSTI ||
1965  this->globalMaxNumRowEntries_ != GSTI),
1966  std::logic_error, "Graph claims not to have global constants, but "
1967  "some of the global constants are not marked as invalid." << suffix);
1968  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1969  (this->haveGlobalConstants_ &&
1970  (this->globalNumEntries_ == GSTI ||
1971  this->globalMaxNumRowEntries_ == GSTI),
1972  std::logic_error, "Graph claims to have global constants, but "
1973  "some of them are marked as invalid." << suffix);
1974  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1975  (this->haveGlobalConstants_ &&
1976  (this->globalNumEntries_ < this->getLocalNumEntries () ||
1977  this->globalMaxNumRowEntries_ < this->nodeMaxNumRowEntries_),
1978  std::logic_error, "Graph claims to have global constants, and "
1979  "all of the values of the global constants are valid, but "
1980  "some of the local constants are greater than "
1981  "their corresponding global constants." << suffix);
1982  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1983  (this->indicesAreAllocated () &&
1984  (this->numAllocForAllRows_ != 0 ||
1985  this->k_numAllocPerRow_.extent (0) != 0),
1986  std::logic_error, "The graph claims that its indices are allocated, but "
1987  "either numAllocForAllRows_ (= " << this->numAllocForAllRows_ << ") is "
1988  "nonzero, or k_numAllocPerRow_ has nonzero dimension. In other words, "
1989  "the graph is supposed to release its \"allocation specifications\" "
1990  "when it allocates its indices." << suffix);
1991  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1992  (rowPtrsUnpacked_host_.extent(0) != rowPtrsUnpacked_dev_.extent(0),
1993  std::logic_error, "The host and device views of k_rowPtrs_ have "
1994  "different sizes; rowPtrsUnpacked_host_ has size "
1995  << rowPtrsUnpacked_host_.extent(0)
1996  << ", but rowPtrsUnpacked_dev_ has size "
1997  << rowPtrsUnpacked_dev_.extent(0)
1998  << "." << suffix);
1999  if (isGloballyIndexed() && rowPtrsUnpacked_host_.extent(0) != 0) {
2000  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2001  (size_t(rowPtrsUnpacked_host_.extent(0)) != size_t(lclNumRows + 1),
2002  std::logic_error, "The graph is globally indexed and "
2003  "k_rowPtrs has nonzero size " << rowPtrsUnpacked_host_.extent(0)
2004  << ", but that size does not equal lclNumRows+1 = "
2005  << (lclNumRows+1) << "." << suffix);
2006  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2007  (rowPtrsUnpacked_host_(lclNumRows) != size_t(gblInds_wdv.extent(0)),
2008  std::logic_error, "The graph is globally indexed and "
2009  "k_rowPtrs_ has nonzero size " << rowPtrsUnpacked_host_.extent(0)
2010  << ", but k_rowPtrs_(lclNumRows=" << lclNumRows << ")="
2011  << rowPtrsUnpacked_host_(lclNumRows)
2012  << " != gblInds_wdv.extent(0)="
2013  << gblInds_wdv.extent(0) << "." << suffix);
2014  }
2015  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2016  (this->isLocallyIndexed () &&
2017  this->rowPtrsUnpacked_host_.extent (0) != 0 &&
2018  (static_cast<size_t> (rowPtrsUnpacked_host_.extent (0)) !=
2019  static_cast<size_t> (lclNumRows + 1) ||
2020  this->rowPtrsUnpacked_host_(lclNumRows) !=
2021  static_cast<size_t> (this->lclIndsUnpacked_wdv.extent (0))),
2022  std::logic_error, "If k_rowPtrs_ has nonzero size and "
2023  "the graph is locally indexed, then "
2024  "k_rowPtrs_ must have N+1 rows, and "
2025  "k_rowPtrs_(N) must equal lclIndsUnpacked_wdv.extent(0)." << suffix);
2026 
2027  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2028  (this->indicesAreAllocated () &&
2029  nodeAllocSize > 0 &&
2030  this->lclIndsUnpacked_wdv.extent (0) == 0 &&
2031  this->gblInds_wdv.extent (0) == 0,
2032  std::logic_error, "Graph is allocated nontrivially, but "
2033  "but 1-D allocations are not present." << suffix);
2034 
2035  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2036  (! this->indicesAreAllocated () &&
2037  ((this->rowPtrsUnpacked_host_.extent (0) != 0 ||
2038  this->k_numRowEntries_.extent (0) != 0) ||
2039  this->lclIndsUnpacked_wdv.extent (0) != 0 ||
2040  this->gblInds_wdv.extent (0) != 0),
2041  std::logic_error, "If indices are not allocated, "
2042  "then none of the buffers should be." << suffix);
2043  // indices may be local or global only if they are allocated
2044  // (numAllocated is redundant; could simply be indicesAreLocal_ ||
2045  // indicesAreGlobal_)
2046  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2047  ((this->indicesAreLocal_ || this->indicesAreGlobal_) &&
2048  ! this->indicesAreAllocated_,
2049  std::logic_error, "Indices may be local or global only if they are "
2050  "allocated." << suffix);
2051  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2052  (this->indicesAreLocal_ && this->indicesAreGlobal_,
2053  std::logic_error, "Indices may not be both local and global." << suffix);
2054  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2055  (indicesAreLocal_ && gblInds_wdv.extent (0) != 0,
2056  std::logic_error, "Indices are local, but "
2057  "gblInds_wdv.extent(0) (= " << gblInds_wdv.extent (0)
2058  << ") != 0. In other words, if indices are local, then "
2059  "allocations of global indices should not be present."
2060  << suffix);
2061  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2062  (indicesAreGlobal_ && lclIndsUnpacked_wdv.extent (0) != 0,
2063  std::logic_error, "Indices are global, but "
2064  "lclIndsUnpacked_wdv.extent(0) (= " << lclIndsUnpacked_wdv.extent(0)
2065  << ") != 0. In other words, if indices are global, "
2066  "then allocations for local indices should not be present."
2067  << suffix);
2068  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2069  (indicesAreLocal_ && nodeAllocSize > 0 &&
2070  lclIndsUnpacked_wdv.extent (0) == 0 && getLocalNumRows () > 0,
2071  std::logic_error, "Indices are local and "
2072  "getLocalAllocationSize() = " << nodeAllocSize << " > 0, but "
2073  "lclIndsUnpacked_wdv.extent(0) = 0 and getLocalNumRows() = "
2074  << getLocalNumRows () << " > 0." << suffix);
2075  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2076  (indicesAreGlobal_ && nodeAllocSize > 0 &&
2077  gblInds_wdv.extent (0) == 0 && getLocalNumRows () > 0,
2078  std::logic_error, "Indices are global and "
2079  "getLocalAllocationSize() = " << nodeAllocSize << " > 0, but "
2080  "gblInds_wdv.extent(0) = 0 and getLocalNumRows() = "
2081  << getLocalNumRows () << " > 0." << suffix);
2082  // check the actual allocations
2083  if (this->indicesAreAllocated () &&
2084  this->rowPtrsUnpacked_host_.extent (0) != 0) {
2085  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2086  (static_cast<size_t> (this->rowPtrsUnpacked_host_.extent (0)) !=
2087  this->getLocalNumRows () + 1,
2088  std::logic_error, "Indices are allocated and "
2089  "k_rowPtrs_ has nonzero length, but rowPtrsUnpacked_host_.extent(0) = "
2090  << this->rowPtrsUnpacked_host_.extent (0) << " != getLocalNumRows()+1 = "
2091  << (this->getLocalNumRows () + 1) << "." << suffix);
2092  const size_t actualNumAllocated =
2093  this->rowPtrsUnpacked_host_(this->getLocalNumRows());
2094  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2095  (this->isLocallyIndexed () &&
2096  static_cast<size_t> (this->lclIndsUnpacked_wdv.extent (0)) != actualNumAllocated,
2097  std::logic_error, "Graph is locally indexed, indices are "
2098  "are allocated, and k_rowPtrs_ has nonzero length, but "
2099  "lclIndsUnpacked_wdv.extent(0) = " << this->lclIndsUnpacked_wdv.extent (0)
2100  << " != actualNumAllocated = " << actualNumAllocated << suffix);
2101  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2102  (this->isGloballyIndexed () &&
2103  static_cast<size_t> (this->gblInds_wdv.extent (0)) != actualNumAllocated,
2104  std::logic_error, "Graph is globally indexed, indices "
2105  "are allocated, and k_rowPtrs_ has nonzero length, but "
2106  "gblInds_wdv.extent(0) = " << this->gblInds_wdv.extent (0)
2107  << " != actualNumAllocated = " << actualNumAllocated << suffix);
2108  }
2109 
2110  if (verbose_) {
2111  std::ostringstream os;
2112  os << *prefix << "Done" << endl;
2113  std::cerr << os.str();
2114  }
2115  }
2116  }
2117 
2118 
2119  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2120  size_t
2122  getNumEntriesInGlobalRow (GlobalOrdinal globalRow) const
2123  {
2124  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (globalRow);
2125  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2126  return Teuchos::OrdinalTraits<size_t>::invalid ();
2127  }
2128  else {
2129  return rowInfo.numEntries;
2130  }
2131  }
2132 
2133 
2134  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2135  size_t
2137  getNumEntriesInLocalRow (LocalOrdinal localRow) const
2138  {
2139  const RowInfo rowInfo = this->getRowInfo (localRow);
2140  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2141  return Teuchos::OrdinalTraits<size_t>::invalid ();
2142  }
2143  else {
2144  return rowInfo.numEntries;
2145  }
2146  }
2147 
2148 
2149  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2150  size_t
2152  getNumAllocatedEntriesInGlobalRow (GlobalOrdinal globalRow) const
2153  {
2154  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (globalRow);
2155  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2156  return Teuchos::OrdinalTraits<size_t>::invalid ();
2157  }
2158  else {
2159  return rowInfo.allocSize;
2160  }
2161  }
2162 
2163 
2164  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2165  size_t
2167  getNumAllocatedEntriesInLocalRow (LocalOrdinal localRow) const
2168  {
2169  const RowInfo rowInfo = this->getRowInfo (localRow);
2170  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2171  return Teuchos::OrdinalTraits<size_t>::invalid ();
2172  }
2173  else {
2174  return rowInfo.allocSize;
2175  }
2176  }
2177 
2178 
2179  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2180  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::row_ptrs_host_view_type
2183  {
2184  return rowPtrsPacked_host_;
2185  }
2186 
2187  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2188  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::row_ptrs_device_view_type
2191  {
2192  return rowPtrsPacked_dev_;
2193  }
2194 
2195 
2196  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2197  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::local_inds_host_view_type
2200  {
2201  return lclIndsPacked_wdv.getHostView(Access::ReadOnly);
2202  }
2203 
2204  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2208  {
2209  return lclIndsPacked_wdv.getDeviceView(Access::ReadOnly);
2210  }
2211 
2212  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2213  void
2215  getLocalRowCopy (LocalOrdinal localRow,
2216  nonconst_local_inds_host_view_type & indices,
2217  size_t& numEntries) const
2218  {
2219  using Teuchos::ArrayView;
2220  const char tfecfFuncName[] = "getLocalRowCopy: ";
2221 
2222  TEUCHOS_TEST_FOR_EXCEPTION(
2223  isGloballyIndexed () && ! hasColMap (), std::runtime_error,
2224  "Tpetra::CrsGraph::getLocalRowCopy: The graph is globally indexed and "
2225  "does not have a column Map yet. That means we don't have local indices "
2226  "for columns yet, so it doesn't make sense to call this method. If the "
2227  "graph doesn't have a column Map yet, you should call fillComplete on "
2228  "it first.");
2229 
2230  // This does the right thing (reports an empty row) if the input
2231  // row is invalid.
2232  const RowInfo rowinfo = this->getRowInfo (localRow);
2233  // No side effects on error.
2234  const size_t theNumEntries = rowinfo.numEntries;
2235  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2236  (static_cast<size_t> (indices.size ()) < theNumEntries,std::runtime_error,
2237  "Specified storage (size==" << indices.size () << ") does not suffice "
2238  "to hold all " << theNumEntries << " entry/ies for this row.");
2239  numEntries = theNumEntries;
2240 
2241  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2242  if (isLocallyIndexed ()) {
2243  auto lclInds = getLocalIndsViewHost(rowinfo);
2244  for (size_t j = 0; j < theNumEntries; ++j) {
2245  indices[j] = lclInds(j);
2246  }
2247  }
2248  else if (isGloballyIndexed ()) {
2249  auto gblInds = getGlobalIndsViewHost(rowinfo);
2250  for (size_t j = 0; j < theNumEntries; ++j) {
2251  indices[j] = colMap_->getLocalElement (gblInds(j));
2252  }
2253  }
2254  }
2255  }
2256 
2257 
2258  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2259  void
2261  getGlobalRowCopy (GlobalOrdinal globalRow,
2262  nonconst_global_inds_host_view_type &indices,
2263  size_t& numEntries) const
2264  {
2265  using Teuchos::ArrayView;
2266  const char tfecfFuncName[] = "getGlobalRowCopy: ";
2267 
2268  // This does the right thing (reports an empty row) if the input
2269  // row is invalid.
2270  const RowInfo rowinfo = getRowInfoFromGlobalRowIndex (globalRow);
2271  const size_t theNumEntries = rowinfo.numEntries;
2272  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2273  static_cast<size_t> (indices.size ()) < theNumEntries, std::runtime_error,
2274  "Specified storage (size==" << indices.size () << ") does not suffice "
2275  "to hold all " << theNumEntries << " entry/ies for this row.");
2276  numEntries = theNumEntries; // first side effect
2277 
2278  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2279  if (isLocallyIndexed ()) {
2280  auto lclInds = getLocalIndsViewHost(rowinfo);
2281  for (size_t j = 0; j < theNumEntries; ++j) {
2282  indices[j] = colMap_->getGlobalElement (lclInds(j));
2283  }
2284  }
2285  else if (isGloballyIndexed ()) {
2286  auto gblInds = getGlobalIndsViewHost(rowinfo);
2287  for (size_t j = 0; j < theNumEntries; ++j) {
2288  indices[j] = gblInds(j);
2289  }
2290  }
2291  }
2292  }
2293 
2294 
2295  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2296  void
2299  const LocalOrdinal localRow,
2300  local_inds_host_view_type &indices) const
2301  {
2302  const char tfecfFuncName[] = "getLocalRowView: ";
2303 
2304  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2305  (isGloballyIndexed (), std::runtime_error, "The graph's indices are "
2306  "currently stored as global indices, so we cannot return a view with "
2307  "local column indices, whether or not the graph has a column Map. If "
2308  "the graph _does_ have a column Map, use getLocalRowCopy() instead.");
2309 
2310  const RowInfo rowInfo = getRowInfo (localRow);
2311  if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2312  rowInfo.numEntries > 0) {
2313  indices = lclIndsUnpacked_wdv.getHostSubview(rowInfo.offset1D,
2314  rowInfo.numEntries,
2315  Access::ReadOnly);
2316  }
2317  else {
2318  // This does the right thing (reports an empty row) if the input
2319  // row is invalid.
2320  indices = local_inds_host_view_type();
2321  }
2322 
2323  if (debug_) {
2324  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2325  (static_cast<size_t> (indices.size ()) !=
2326  getNumEntriesInLocalRow (localRow), std::logic_error, "indices.size() "
2327  "= " << indices.extent(0) << " != getNumEntriesInLocalRow(localRow=" <<
2328  localRow << ") = " << getNumEntriesInLocalRow(localRow) <<
2329  ". Please report this bug to the Tpetra developers.");
2330  }
2331  }
2332 
2333 
2334  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2335  void
2338  const GlobalOrdinal globalRow,
2339  global_inds_host_view_type &indices) const
2340  {
2341  const char tfecfFuncName[] = "getGlobalRowView: ";
2342 
2343  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2344  (isLocallyIndexed (), std::runtime_error, "The graph's indices are "
2345  "currently stored as local indices, so we cannot return a view with "
2346  "global column indices. Use getGlobalRowCopy() instead.");
2347 
2348  // This does the right thing (reports an empty row) if the input
2349  // row is invalid.
2350  const RowInfo rowInfo = getRowInfoFromGlobalRowIndex (globalRow);
2351  if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2352  rowInfo.numEntries > 0) {
2353  indices = gblInds_wdv.getHostSubview(rowInfo.offset1D,
2354  rowInfo.numEntries,
2355  Access::ReadOnly);
2356  }
2357  else {
2358  indices = typename global_inds_dualv_type::t_host::const_type();
2359  }
2360  if (debug_) {
2361  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2362  (static_cast<size_t> (indices.size ()) !=
2363  getNumEntriesInGlobalRow (globalRow),
2364  std::logic_error, "indices.size() = " << indices.extent(0)
2365  << " != getNumEntriesInGlobalRow(globalRow=" << globalRow << ") = "
2366  << getNumEntriesInGlobalRow (globalRow)
2367  << ". Please report this bug to the Tpetra developers.");
2368  }
2369  }
2370 
2371 
2372  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2373  void
2375  insertLocalIndices (const LocalOrdinal localRow,
2376  const Teuchos::ArrayView<const LocalOrdinal>& indices)
2377  {
2378  const char tfecfFuncName[] = "insertLocalIndices: ";
2379 
2380  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2381  (! isFillActive (), std::runtime_error, "Fill must be active.");
2382  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2383  (isGloballyIndexed (), std::runtime_error,
2384  "Graph indices are global; use insertGlobalIndices().");
2385  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2386  (! hasColMap (), std::runtime_error,
2387  "Cannot insert local indices without a column Map.");
2388  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2389  (! rowMap_->isNodeLocalElement (localRow), std::runtime_error,
2390  "Local row index " << localRow << " is not in the row Map "
2391  "on the calling process.");
2392  if (! indicesAreAllocated ()) {
2393  allocateIndices (LocalIndices, verbose_);
2394  }
2395 
2396  if (debug_) {
2397  // In debug mode, if the graph has a column Map, test whether any
2398  // of the given column indices are not in the column Map. Keep
2399  // track of the invalid column indices so we can tell the user
2400  // about them.
2401  if (hasColMap ()) {
2402  using Teuchos::Array;
2403  using Teuchos::toString;
2404  using std::endl;
2405  typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type size_type;
2406 
2407  const map_type& colMap = *colMap_;
2408  Array<LocalOrdinal> badColInds;
2409  bool allInColMap = true;
2410  for (size_type k = 0; k < indices.size (); ++k) {
2411  if (! colMap.isNodeLocalElement (indices[k])) {
2412  allInColMap = false;
2413  badColInds.push_back (indices[k]);
2414  }
2415  }
2416  if (! allInColMap) {
2417  std::ostringstream os;
2418  os << "Tpetra::CrsGraph::insertLocalIndices: You attempted to insert "
2419  "entries in owned row " << localRow << ", at the following column "
2420  "indices: " << toString (indices) << "." << endl;
2421  os << "Of those, the following indices are not in the column Map on "
2422  "this process: " << toString (badColInds) << "." << endl << "Since "
2423  "the graph has a column Map already, it is invalid to insert entries "
2424  "at those locations.";
2425  TEUCHOS_TEST_FOR_EXCEPTION(! allInColMap, std::invalid_argument, os.str ());
2426  }
2427  }
2428  }
2429 
2430  insertLocalIndicesImpl (localRow, indices);
2431 
2432  if (debug_) {
2433  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2434  (! indicesAreAllocated () || ! isLocallyIndexed (), std::logic_error,
2435  "At the end of insertLocalIndices, ! indicesAreAllocated() || "
2436  "! isLocallyIndexed() is true. Please report this bug to the "
2437  "Tpetra developers.");
2438  }
2439  }
2440 
2441  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2442  void
2444  insertLocalIndices (const LocalOrdinal localRow,
2445  const LocalOrdinal numEnt,
2446  const LocalOrdinal inds[])
2447  {
2448  Teuchos::ArrayView<const LocalOrdinal> indsT (inds, numEnt);
2449  this->insertLocalIndices (localRow, indsT);
2450  }
2451 
2452 
2453  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2454  void
2456  insertGlobalIndices (const GlobalOrdinal gblRow,
2457  const LocalOrdinal numInputInds,
2458  const GlobalOrdinal inputGblColInds[])
2459  {
2460  typedef LocalOrdinal LO;
2461  const char tfecfFuncName[] = "insertGlobalIndices: ";
2462 
2463  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2464  (this->isLocallyIndexed (), std::runtime_error,
2465  "graph indices are local; use insertLocalIndices().");
2466  // This can't really be satisfied for now, because if we are
2467  // fillComplete(), then we are local. In the future, this may
2468  // change. However, the rule that modification require active
2469  // fill will not change.
2470  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2471  (! this->isFillActive (), std::runtime_error,
2472  "You are not allowed to call this method if fill is not active. "
2473  "If fillComplete has been called, you must first call resumeFill "
2474  "before you may insert indices.");
2475  if (! indicesAreAllocated ()) {
2476  allocateIndices (GlobalIndices, verbose_);
2477  }
2478  const LO lclRow = this->rowMap_->getLocalElement (gblRow);
2479  if (lclRow != Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
2480  if (debug_) {
2481  if (this->hasColMap ()) {
2482  using std::endl;
2483  const map_type& colMap = * (this->colMap_);
2484  // In a debug build, keep track of the nonowned ("bad") column
2485  // indices, so that we can display them in the exception
2486  // message. In a release build, just ditch the loop early if
2487  // we encounter a nonowned column index.
2488  std::vector<GlobalOrdinal> badColInds;
2489  bool allInColMap = true;
2490  for (LO k = 0; k < numInputInds; ++k) {
2491  if (! colMap.isNodeGlobalElement (inputGblColInds[k])) {
2492  allInColMap = false;
2493  badColInds.push_back (inputGblColInds[k]);
2494  }
2495  }
2496  if (! allInColMap) {
2497  std::ostringstream os;
2498  os << "You attempted to insert entries in owned row " << gblRow
2499  << ", at the following column indices: [";
2500  for (LO k = 0; k < numInputInds; ++k) {
2501  os << inputGblColInds[k];
2502  if (k + static_cast<LO> (1) < numInputInds) {
2503  os << ",";
2504  }
2505  }
2506  os << "]." << endl << "Of those, the following indices are not in "
2507  "the column Map on this process: [";
2508  for (size_t k = 0; k < badColInds.size (); ++k) {
2509  os << badColInds[k];
2510  if (k + size_t (1) < badColInds.size ()) {
2511  os << ",";
2512  }
2513  }
2514  os << "]." << endl << "Since the matrix has a column Map already, "
2515  "it is invalid to insert entries at those locations.";
2516  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2517  (true, std::invalid_argument, os.str ());
2518  }
2519  }
2520  } // debug_
2521  this->insertGlobalIndicesImpl (lclRow, inputGblColInds, numInputInds);
2522  }
2523  else { // a nonlocal row
2524  this->insertGlobalIndicesIntoNonownedRows (gblRow, inputGblColInds,
2525  numInputInds);
2526  }
2527  }
2528 
2529 
2530  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2531  void
2533  insertGlobalIndices (const GlobalOrdinal gblRow,
2534  const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds)
2535  {
2536  this->insertGlobalIndices (gblRow, inputGblColInds.size (),
2537  inputGblColInds.getRawPtr ());
2538  }
2539 
2540 
2541  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2542  void
2544  insertGlobalIndicesFiltered (const LocalOrdinal lclRow,
2545  const GlobalOrdinal gblColInds[],
2546  const LocalOrdinal numGblColInds)
2547  {
2548  typedef LocalOrdinal LO;
2549  typedef GlobalOrdinal GO;
2550  const char tfecfFuncName[] = "insertGlobalIndicesFiltered: ";
2551 
2552  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2553  (this->isLocallyIndexed (), std::runtime_error,
2554  "Graph indices are local; use insertLocalIndices().");
2555  // This can't really be satisfied for now, because if we are
2556  // fillComplete(), then we are local. In the future, this may
2557  // change. However, the rule that modification require active
2558  // fill will not change.
2559  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2560  (! this->isFillActive (), std::runtime_error,
2561  "You are not allowed to call this method if fill is not active. "
2562  "If fillComplete has been called, you must first call resumeFill "
2563  "before you may insert indices.");
2564  if (! indicesAreAllocated ()) {
2565  allocateIndices (GlobalIndices, verbose_);
2566  }
2567 
2568  Teuchos::ArrayView<const GO> gblColInds_av (gblColInds, numGblColInds);
2569  // If we have a column Map, use it to filter the entries.
2570  if (! colMap_.is_null ()) {
2571  const map_type& colMap = * (this->colMap_);
2572 
2573  LO curOffset = 0;
2574  while (curOffset < numGblColInds) {
2575  // Find a sequence of input indices that are in the column Map
2576  // on the calling process. Doing a sequence at a time,
2577  // instead of one at a time, amortizes some overhead.
2578  LO endOffset = curOffset;
2579  for ( ; endOffset < numGblColInds; ++endOffset) {
2580  const LO lclCol = colMap.getLocalElement (gblColInds[endOffset]);
2581  if (lclCol == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
2582  break; // first entry, in current sequence, not in the column Map
2583  }
2584  }
2585  // curOffset, endOffset: half-exclusive range of indices in
2586  // the column Map on the calling process. If endOffset ==
2587  // curOffset, the range is empty.
2588  const LO numIndInSeq = (endOffset - curOffset);
2589  if (numIndInSeq != 0) {
2590  this->insertGlobalIndicesImpl (lclRow, gblColInds + curOffset,
2591  numIndInSeq);
2592  }
2593  // Invariant before this line: Either endOffset ==
2594  // numGblColInds, or gblColInds[endOffset] is not in the
2595  // column Map on the calling process.
2596  curOffset = endOffset + 1;
2597  }
2598  }
2599  else {
2600  this->insertGlobalIndicesImpl (lclRow, gblColInds_av.getRawPtr (),
2601  gblColInds_av.size ());
2602  }
2603  }
2604 
2605  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2606  void
2608  insertGlobalIndicesIntoNonownedRows (const GlobalOrdinal gblRow,
2609  const GlobalOrdinal gblColInds[],
2610  const LocalOrdinal numGblColInds)
2611  {
2612  // This creates the std::vector if it doesn't exist yet.
2613  // std::map's operator[] does a lookup each time, so it's better
2614  // to pull nonlocals_[grow] out of the loop.
2615  std::vector<GlobalOrdinal>& nonlocalRow = this->nonlocals_[gblRow];
2616  for (LocalOrdinal k = 0; k < numGblColInds; ++k) {
2617  // FIXME (mfh 20 Jul 2017) Would be better to use a set, in
2618  // order to avoid duplicates. globalAssemble() sorts these
2619  // anyway.
2620  nonlocalRow.push_back (gblColInds[k]);
2621  }
2622  }
2623 
2624  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2625  void
2627  removeLocalIndices (LocalOrdinal lrow)
2628  {
2629  const char tfecfFuncName[] = "removeLocalIndices: ";
2630  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2631  ! isFillActive (), std::runtime_error, "requires that fill is active.");
2632  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2633  isStorageOptimized (), std::runtime_error,
2634  "cannot remove indices after optimizeStorage() has been called.");
2635  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2636  isGloballyIndexed (), std::runtime_error, "graph indices are global.");
2637  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2638  ! rowMap_->isNodeLocalElement (lrow), std::runtime_error,
2639  "Local row " << lrow << " is not in the row Map on the calling process.");
2640  if (! indicesAreAllocated ()) {
2641  allocateIndices (LocalIndices, verbose_);
2642  }
2643 
2644  if (k_numRowEntries_.extent (0) != 0) {
2645  this->k_numRowEntries_(lrow) = 0;
2646  }
2647 
2648  if (debug_) {
2649  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2650  (getNumEntriesInLocalRow (lrow) != 0 ||
2651  ! indicesAreAllocated () ||
2652  ! isLocallyIndexed (), std::logic_error,
2653  "Violated stated post-conditions. Please contact Tpetra team.");
2654  }
2655  }
2656 
2657 
2658  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2659  void
2661  setAllIndices (const typename local_graph_device_type::row_map_type& rowPointers,
2662  const typename local_graph_device_type::entries_type::non_const_type& columnIndices)
2663  {
2664  using ProfilingRegion=Details::ProfilingRegion;
2665  ProfilingRegion region ("Tpetra::CrsGraph::setAllIndices");
2666  const char tfecfFuncName[] = "setAllIndices: ";
2667  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2668  ! hasColMap () || getColMap ().is_null (), std::runtime_error,
2669  "The graph must have a column Map before you may call this method.");
2670  LocalOrdinal numLocalRows = this->getLocalNumRows ();
2671  {
2672  LocalOrdinal rowPtrLen = rowPointers.size();
2673  if(numLocalRows == 0) {
2674  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2675  rowPtrLen != 0 && rowPtrLen != 1,
2676  std::runtime_error, "Have 0 local rows, but rowPointers.size() is neither 0 nor 1.");
2677  }
2678  else {
2679  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2680  rowPtrLen != numLocalRows + 1,
2681  std::runtime_error, "rowPointers.size() = " << rowPtrLen <<
2682  " != this->getLocalNumRows()+1 = " << (numLocalRows + 1) << ".");
2683  }
2684  }
2685 
2686  if(debug_) {
2687  using exec_space = typename local_graph_device_type::execution_space;
2688  int columnsOutOfBounds = 0;
2689  local_ordinal_type numLocalCols = this->getLocalNumCols();
2690  Kokkos::parallel_reduce(Kokkos::RangePolicy<exec_space>(0, columnIndices.extent(0)),
2691  KOKKOS_LAMBDA (const LocalOrdinal i, int& lOutOfBounds)
2692  {
2693  if(columnIndices(i) < 0 || columnIndices(i) >= numLocalCols)
2694  lOutOfBounds++;
2695  }, columnsOutOfBounds);
2696  int globalColsOutOfBounds= 0;
2697  auto comm = this->getComm();
2698  Teuchos::reduceAll<int, int> (*comm, Teuchos::REDUCE_MAX, columnsOutOfBounds,
2699  Teuchos::outArg (globalColsOutOfBounds));
2700  if (globalColsOutOfBounds)
2701  {
2702  std::string message;
2703  if (columnsOutOfBounds)
2704  {
2705  //Only print message from ranks with the problem
2706  message = std::string("ERROR, rank ") + std::to_string(comm->getRank()) + ", CrsGraph::setAllIndices(): provided columnIndices are not all within range [0, getLocalNumCols())!\n";
2707  }
2708  Details::gathervPrint(std::cout, message, *comm);
2709  throw std::invalid_argument("CrsGraph::setAllIndices(): columnIndices are out of the valid range on at least one process.");
2710  }
2711  }
2712 
2713  if (debug_ && this->isSorted()) {
2714  // Verify that the local indices are actually sorted
2715  int notSorted = 0;
2716  using exec_space = typename local_graph_device_type::execution_space;
2717  using size_type = typename local_graph_device_type::size_type;
2718  Kokkos::parallel_reduce(Kokkos::RangePolicy<exec_space>(0, numLocalRows),
2719  KOKKOS_LAMBDA (const LocalOrdinal i, int& lNotSorted)
2720  {
2721  size_type rowBegin = rowPointers(i);
2722  size_type rowEnd = rowPointers(i + 1);
2723  for(size_type j = rowBegin + 1; j < rowEnd; j++)
2724  {
2725  if(columnIndices(j - 1) > columnIndices(j))
2726  {
2727  lNotSorted = 1;
2728  }
2729  }
2730  }, notSorted);
2731  //All-reduce notSorted to avoid rank divergence
2732  int globalNotSorted = 0;
2733  auto comm = this->getComm();
2734  Teuchos::reduceAll<int, int> (*comm, Teuchos::REDUCE_MAX, notSorted,
2735  Teuchos::outArg (globalNotSorted));
2736  if (globalNotSorted)
2737  {
2738  std::string message;
2739  if (notSorted)
2740  {
2741  //Only print message from ranks with the problem
2742  message = std::string("ERROR, rank ") + std::to_string(comm->getRank()) + ", CrsGraph::setAllIndices(): provided columnIndices are not sorted!\n";
2743  }
2744  Details::gathervPrint(std::cout, message, *comm);
2745  throw std::invalid_argument("CrsGraph::setAllIndices(): provided columnIndices are not sorted within rows on at least one process.");
2746  }
2747  }
2748 
2749  indicesAreAllocated_ = true;
2750  indicesAreLocal_ = true;
2751  indicesAreSorted_ = true;
2752  noRedundancies_ = true;
2753  lclIndsPacked_wdv= local_inds_wdv_type(columnIndices);
2754  lclIndsUnpacked_wdv = lclIndsPacked_wdv;
2755  setRowPtrs(rowPointers);
2756 
2757  set_need_sync_host_uvm_access(); // columnIndices and rowPointers potentially still in a kernel
2758 
2759  // Storage MUST be packed, since the interface doesn't give any
2760  // way to indicate any extra space at the end of each row.
2761  storageStatus_ = Details::STORAGE_1D_PACKED;
2762 
2763  // These normally get cleared out at the end of allocateIndices.
2764  // It makes sense to clear them out here, because at the end of
2765  // this method, the graph is allocated on the calling process.
2766  numAllocForAllRows_ = 0;
2767  k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
2768 
2769  checkInternalState ();
2770  }
2771 
2772 
2773  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2774  void
2776  setAllIndices (const Teuchos::ArrayRCP<size_t>& rowPointers,
2777  const Teuchos::ArrayRCP<LocalOrdinal>& columnIndices)
2778  {
2779  using Kokkos::View;
2780  typedef typename local_graph_device_type::row_map_type row_map_type;
2781  typedef typename row_map_type::array_layout layout_type;
2782  typedef typename row_map_type::non_const_value_type row_offset_type;
2783  typedef View<size_t*, layout_type , Kokkos::HostSpace,
2784  Kokkos::MemoryUnmanaged> input_view_type;
2785  typedef typename row_map_type::non_const_type nc_row_map_type;
2786 
2787  const size_t size = static_cast<size_t> (rowPointers.size ());
2788  constexpr bool same = std::is_same<size_t, row_offset_type>::value;
2789  input_view_type ptr_in (rowPointers.getRawPtr (), size);
2790 
2791  nc_row_map_type ptr_rot ("Tpetra::CrsGraph::ptr", size);
2792 
2793  // FIXME get rid of the else-clause when the minimum CXX standard required is bumped to C++17
2794 #ifdef KOKKOS_ENABLE_CXX17
2795  if constexpr (same) { // size_t == row_offset_type
2796  using lexecution_space = typename device_type::execution_space;
2797  Kokkos::deep_copy (lexecution_space(),
2798  ptr_rot,
2799  ptr_in);
2800  }
2801 #else
2802  if (same) { // size_t == row_offset_type
2803  // This compile-time logic ensures that the compiler never sees
2804  // an assignment of View<row_offset_type*, ...> to View<size_t*,
2805  // ...> unless size_t == row_offset_type.
2806  input_view_type ptr_decoy (rowPointers.getRawPtr (), size); // never used
2807  // DEEP_COPY REVIEW - HOST-TO-DEVICE
2809  Kokkos::Impl::if_c<same,
2810  nc_row_map_type,
2811  input_view_type>::select (ptr_rot, ptr_decoy),
2812  ptr_in);
2813  }
2814 #endif
2815  else { // size_t != row_offset_type
2816  // CudaUvmSpace != HostSpace, so this will be false in that case.
2817  constexpr bool inHostMemory =
2818  std::is_same<typename row_map_type::memory_space,
2819  Kokkos::HostSpace>::value;
2820  if (inHostMemory) {
2821  // Copy (with cast from size_t to row_offset_type, with bounds
2822  // checking if necessary) to ptr_rot.
2823  ::Tpetra::Details::copyOffsets (ptr_rot, ptr_in);
2824  }
2825  else { // Copy input row offsets to device first.
2826  //
2827  // FIXME (mfh 24 Mar 2015) If CUDA UVM, running in the host's
2828  // execution space would avoid the double copy.
2829  //
2830  View<size_t*, layout_type, device_type> ptr_st ("Tpetra::CrsGraph::ptr", size);
2831 
2832  // DEEP_COPY REVIEW - NOT TESTED
2833  Kokkos::deep_copy (ptr_st, ptr_in);
2834  // Copy on device (casting from size_t to row_offset_type,
2835  // with bounds checking if necessary) to ptr_rot. This
2836  // executes in the output View's execution space, which is the
2837  // same as execution_space.
2838  ::Tpetra::Details::copyOffsets (ptr_rot, ptr_st);
2839  }
2840  }
2841 
2842  Kokkos::View<LocalOrdinal*, layout_type, device_type> k_ind =
2843  Kokkos::Compat::getKokkosViewDeepCopy<device_type> (columnIndices ());
2844  setAllIndices (ptr_rot, k_ind);
2845  }
2846 
2847 
2848  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2849  void
2852  {
2853  using Teuchos::Comm;
2854  using Teuchos::outArg;
2855  using Teuchos::RCP;
2856  using Teuchos::rcp;
2857  using Teuchos::REDUCE_MAX;
2858  using Teuchos::REDUCE_MIN;
2859  using Teuchos::reduceAll;
2860  using std::endl;
2861  using crs_graph_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
2862  using LO = local_ordinal_type;
2863  using GO = global_ordinal_type;
2864  using size_type = typename Teuchos::Array<GO>::size_type;
2865  const char tfecfFuncName[] = "globalAssemble: "; // for exception macro
2866 
2867  std::unique_ptr<std::string> prefix;
2868  if (verbose_) {
2869  prefix = this->createPrefix("CrsGraph", "globalAssemble");
2870  std::ostringstream os;
2871  os << *prefix << "Start" << endl;
2872  std::cerr << os.str();
2873  }
2874  RCP<const Comm<int> > comm = getComm ();
2875 
2876  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2877  (! isFillActive (), std::runtime_error, "Fill must be active before "
2878  "you may call this method.");
2879 
2880  const size_t myNumNonlocalRows = this->nonlocals_.size ();
2881 
2882  // If no processes have nonlocal rows, then we don't have to do
2883  // anything. Checking this is probably cheaper than constructing
2884  // the Map of nonlocal rows (see below) and noticing that it has
2885  // zero global entries.
2886  {
2887  const int iHaveNonlocalRows = (myNumNonlocalRows == 0) ? 0 : 1;
2888  int someoneHasNonlocalRows = 0;
2889  reduceAll<int, int> (*comm, REDUCE_MAX, iHaveNonlocalRows,
2890  outArg (someoneHasNonlocalRows));
2891  if (someoneHasNonlocalRows == 0) {
2892  if (verbose_) {
2893  std::ostringstream os;
2894  os << *prefix << "Done: No nonlocal rows" << endl;
2895  std::cerr << os.str();
2896  }
2897  return;
2898  }
2899  else if (verbose_) {
2900  std::ostringstream os;
2901  os << *prefix << "At least 1 process has nonlocal rows"
2902  << endl;
2903  std::cerr << os.str();
2904  }
2905  }
2906 
2907  // 1. Create a list of the "nonlocal" rows on each process. this
2908  // requires iterating over nonlocals_, so while we do this,
2909  // deduplicate the entries and get a count for each nonlocal
2910  // row on this process.
2911  // 2. Construct a new row Map corresponding to those rows. This
2912  // Map is likely overlapping. We know that the Map is not
2913  // empty on all processes, because the above all-reduce and
2914  // return exclude that case.
2915 
2916  RCP<const map_type> nonlocalRowMap;
2917  // Keep this for CrsGraph's constructor.
2918  Teuchos::Array<size_t> numEntPerNonlocalRow (myNumNonlocalRows);
2919  {
2920  Teuchos::Array<GO> myNonlocalGblRows (myNumNonlocalRows);
2921  size_type curPos = 0;
2922  for (auto mapIter = this->nonlocals_.begin ();
2923  mapIter != this->nonlocals_.end ();
2924  ++mapIter, ++curPos) {
2925  myNonlocalGblRows[curPos] = mapIter->first;
2926  std::vector<GO>& gblCols = mapIter->second; // by ref; change in place
2927  std::sort (gblCols.begin (), gblCols.end ());
2928  auto vecLast = std::unique (gblCols.begin (), gblCols.end ());
2929  gblCols.erase (vecLast, gblCols.end ());
2930  numEntPerNonlocalRow[curPos] = gblCols.size ();
2931  }
2932 
2933  // Currently, Map requires that its indexBase be the global min
2934  // of all its global indices. Map won't compute this for us, so
2935  // we must do it. If our process has no nonlocal rows, set the
2936  // "min" to the max possible GO value. This ensures that if
2937  // some process has at least one nonlocal row, then it will pick
2938  // that up as the min. We know that at least one process has a
2939  // nonlocal row, since the all-reduce and return at the top of
2940  // this method excluded that case.
2941  GO myMinNonlocalGblRow = std::numeric_limits<GO>::max ();
2942  {
2943  auto iter = std::min_element (myNonlocalGblRows.begin (),
2944  myNonlocalGblRows.end ());
2945  if (iter != myNonlocalGblRows.end ()) {
2946  myMinNonlocalGblRow = *iter;
2947  }
2948  }
2949  GO gblMinNonlocalGblRow = 0;
2950  reduceAll<int, GO> (*comm, REDUCE_MIN, myMinNonlocalGblRow,
2951  outArg (gblMinNonlocalGblRow));
2952  const GO indexBase = gblMinNonlocalGblRow;
2953  const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid ();
2954  nonlocalRowMap = rcp (new map_type (INV, myNonlocalGblRows (), indexBase, comm));
2955  }
2956 
2957  if (verbose_) {
2958  std::ostringstream os;
2959  os << *prefix << "nonlocalRowMap->getIndexBase()="
2960  << nonlocalRowMap->getIndexBase() << endl;
2961  std::cerr << os.str();
2962  }
2963 
2964  // 3. Use the column indices for each nonlocal row, as stored in
2965  // nonlocals_, to construct a CrsGraph corresponding to
2966  // nonlocal rows. We need, but we have, exact counts of the
2967  // number of entries in each nonlocal row.
2968 
2969  RCP<crs_graph_type> nonlocalGraph =
2970  rcp(new crs_graph_type(nonlocalRowMap, numEntPerNonlocalRow()));
2971  {
2972  size_type curPos = 0;
2973  for (auto mapIter = this->nonlocals_.begin ();
2974  mapIter != this->nonlocals_.end ();
2975  ++mapIter, ++curPos) {
2976  const GO gblRow = mapIter->first;
2977  std::vector<GO>& gblCols = mapIter->second; // by ref just to avoid copy
2978  const LO numEnt = static_cast<LO> (numEntPerNonlocalRow[curPos]);
2979  nonlocalGraph->insertGlobalIndices (gblRow, numEnt, gblCols.data ());
2980  }
2981  }
2982  if (verbose_) {
2983  std::ostringstream os;
2984  os << *prefix << "Built nonlocal graph" << endl;
2985  std::cerr << os.str();
2986  }
2987  // There's no need to fill-complete the nonlocals graph.
2988  // We just use it as a temporary container for the Export.
2989 
2990  // 4. If the original row Map is one to one, then we can Export
2991  // directly from nonlocalGraph into this. Otherwise, we have
2992  // to create a temporary graph with a one-to-one row Map,
2993  // Export into that, then Import from the temporary graph into
2994  // *this.
2995 
2996  auto origRowMap = this->getRowMap ();
2997  const bool origRowMapIsOneToOne = origRowMap->isOneToOne ();
2998 
2999  if (origRowMapIsOneToOne) {
3000  if (verbose_) {
3001  std::ostringstream os;
3002  os << *prefix << "Original row Map is 1-to-1" << endl;
3003  std::cerr << os.str();
3004  }
3005  export_type exportToOrig (nonlocalRowMap, origRowMap);
3006  this->doExport (*nonlocalGraph, exportToOrig, Tpetra::INSERT);
3007  // We're done at this point!
3008  }
3009  else {
3010  if (verbose_) {
3011  std::ostringstream os;
3012  os << *prefix << "Original row Map is NOT 1-to-1" << endl;
3013  std::cerr << os.str();
3014  }
3015  // If you ask a Map whether it is one to one, it does some
3016  // communication and stashes intermediate results for later use
3017  // by createOneToOne. Thus, calling createOneToOne doesn't cost
3018  // much more then the original cost of calling isOneToOne.
3019  auto oneToOneRowMap = Tpetra::createOneToOne (origRowMap);
3020  export_type exportToOneToOne (nonlocalRowMap, oneToOneRowMap);
3021 
3022  // Create a temporary graph with the one-to-one row Map.
3023  //
3024  // TODO (mfh 09 Sep 2016) Estimate the number of entries in each
3025  // row, to avoid reallocation during the Export operation.
3026  crs_graph_type oneToOneGraph (oneToOneRowMap, 0);
3027 
3028  // Export from graph of nonlocals into the temp one-to-one graph.
3029  if (verbose_) {
3030  std::ostringstream os;
3031  os << *prefix << "Export nonlocal graph" << endl;
3032  std::cerr << os.str();
3033  }
3034  oneToOneGraph.doExport (*nonlocalGraph, exportToOneToOne, Tpetra::INSERT);
3035 
3036  // We don't need the graph of nonlocals anymore, so get rid of
3037  // it, to keep the memory high-water mark down.
3038  nonlocalGraph = Teuchos::null;
3039 
3040  // Import from the one-to-one graph to the original graph.
3041  import_type importToOrig (oneToOneRowMap, origRowMap);
3042  if (verbose_) {
3043  std::ostringstream os;
3044  os << *prefix << "Import nonlocal graph" << endl;
3045  std::cerr << os.str();
3046  }
3047  this->doImport (oneToOneGraph, importToOrig, Tpetra::INSERT);
3048  }
3049 
3050  // It's safe now to clear out nonlocals_, since we've already
3051  // committed side effects to *this. The standard idiom for
3052  // clearing a Container like std::map, is to swap it with an empty
3053  // Container and let the swapped Container fall out of scope.
3054  decltype (this->nonlocals_) newNonlocals;
3055  std::swap (this->nonlocals_, newNonlocals);
3056 
3057  checkInternalState ();
3058  if (verbose_) {
3059  std::ostringstream os;
3060  os << *prefix << "Done" << endl;
3061  std::cerr << os.str();
3062  }
3063  }
3064 
3065 
3066  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3067  void
3069  resumeFill (const Teuchos::RCP<Teuchos::ParameterList>& params)
3070  {
3071  clearGlobalConstants();
3072  if (params != Teuchos::null) this->setParameterList (params);
3073  // either still sorted/merged or initially sorted/merged
3074  indicesAreSorted_ = true;
3075  noRedundancies_ = true;
3076  fillComplete_ = false;
3077  }
3078 
3079 
3080  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3081  void
3083  fillComplete (const Teuchos::RCP<Teuchos::ParameterList>& params)
3084  {
3085  // If the graph already has domain and range Maps, don't clobber
3086  // them. If it doesn't, use the current row Map for both the
3087  // domain and range Maps.
3088  //
3089  // NOTE (mfh 28 Sep 2014): If the graph was constructed without a
3090  // column Map, and column indices are inserted which are not in
3091  // the row Map on any process, this will cause troubles. However,
3092  // that is not a common case for most applications that we
3093  // encounter, and checking for it might require more
3094  // communication.
3095  Teuchos::RCP<const map_type> domMap = this->getDomainMap ();
3096  if (domMap.is_null ()) {
3097  domMap = this->getRowMap ();
3098  }
3099  Teuchos::RCP<const map_type> ranMap = this->getRangeMap ();
3100  if (ranMap.is_null ()) {
3101  ranMap = this->getRowMap ();
3102  }
3103  this->fillComplete (domMap, ranMap, params);
3104  }
3105 
3106 
3107  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3108  void
3110  fillComplete (const Teuchos::RCP<const map_type>& domainMap,
3111  const Teuchos::RCP<const map_type>& rangeMap,
3112  const Teuchos::RCP<Teuchos::ParameterList>& params)
3113  {
3114  using std::endl;
3115  const char tfecfFuncName[] = "fillComplete: ";
3116  const bool verbose = verbose_;
3117 
3118  std::unique_ptr<std::string> prefix;
3119  if (verbose) {
3120  prefix = this->createPrefix("CrsGraph", "fillComplete");
3121  std::ostringstream os;
3122  os << *prefix << "Start" << endl;
3123  std::cerr << os.str();
3124  }
3125 
3126  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3127  (! isFillActive () || isFillComplete (), std::runtime_error,
3128  "Graph fill state must be active (isFillActive() "
3129  "must be true) before calling fillComplete().");
3130 
3131  const int numProcs = getComm ()->getSize ();
3132 
3133  //
3134  // Read and set parameters
3135  //
3136 
3137  // Does the caller want to sort remote GIDs (within those owned by
3138  // the same process) in makeColMap()?
3139  if (! params.is_null ()) {
3140  if (params->isParameter ("sort column map ghost gids")) {
3141  sortGhostsAssociatedWithEachProcessor_ =
3142  params->get<bool> ("sort column map ghost gids",
3143  sortGhostsAssociatedWithEachProcessor_);
3144  }
3145  else if (params->isParameter ("Sort column Map ghost GIDs")) {
3146  sortGhostsAssociatedWithEachProcessor_ =
3147  params->get<bool> ("Sort column Map ghost GIDs",
3148  sortGhostsAssociatedWithEachProcessor_);
3149  }
3150  }
3151 
3152  // If true, the caller promises that no process did nonlocal
3153  // changes since the last call to fillComplete.
3154  bool assertNoNonlocalInserts = false;
3155  if (! params.is_null ()) {
3156  assertNoNonlocalInserts =
3157  params->get<bool> ("No Nonlocal Changes", assertNoNonlocalInserts);
3158  }
3159 
3160  //
3161  // Allocate indices, if they haven't already been allocated
3162  //
3163  if (! indicesAreAllocated ()) {
3164  if (hasColMap ()) {
3165  // We have a column Map, so use local indices.
3166  allocateIndices (LocalIndices, verbose);
3167  } else {
3168  // We don't have a column Map, so use global indices.
3169  allocateIndices (GlobalIndices, verbose);
3170  }
3171  }
3172 
3173  //
3174  // Do global assembly, if requested and if the communicator
3175  // contains more than one process.
3176  //
3177  const bool mayNeedGlobalAssemble = ! assertNoNonlocalInserts && numProcs > 1;
3178  if (mayNeedGlobalAssemble) {
3179  // This first checks if we need to do global assembly.
3180  // The check costs a single all-reduce.
3181  globalAssemble ();
3182  }
3183  else {
3184  const size_t numNonlocals = nonlocals_.size();
3185  if (verbose) {
3186  std::ostringstream os;
3187  os << *prefix << "Do not need to call globalAssemble; "
3188  "assertNoNonlocalInserts="
3189  << (assertNoNonlocalInserts ? "true" : "false")
3190  << "numProcs=" << numProcs
3191  << ", nonlocals_.size()=" << numNonlocals << endl;
3192  std::cerr << os.str();
3193  }
3194  const int lclNeededGlobalAssemble =
3195  (numProcs > 1 && numNonlocals != 0) ? 1 : 0;
3196  if (lclNeededGlobalAssemble != 0 && verbose) {
3197  std::ostringstream os;
3198  os << *prefix;
3199  Details::Impl::verbosePrintMap(
3200  os, nonlocals_.begin(), nonlocals_.end(),
3201  nonlocals_.size(), "nonlocals_");
3202  std::cerr << os.str() << endl;
3203  }
3204 
3205  if (debug_) {
3206  auto map = this->getMap();
3207  auto comm = map.is_null() ? Teuchos::null : map->getComm();
3208  int gblNeededGlobalAssemble = lclNeededGlobalAssemble;
3209  if (! comm.is_null()) {
3210  using Teuchos::REDUCE_MAX;
3211  using Teuchos::reduceAll;
3212  reduceAll(*comm, REDUCE_MAX, lclNeededGlobalAssemble,
3213  Teuchos::outArg(gblNeededGlobalAssemble));
3214  }
3215  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3216  (gblNeededGlobalAssemble != 0, std::runtime_error,
3217  "nonlocals_.size()=" << numNonlocals << " != 0 on at "
3218  "least one process in the CrsGraph's communicator. This "
3219  "means either that you incorrectly set the "
3220  "\"No Nonlocal Changes\" fillComplete parameter to true, "
3221  "or that you inserted invalid entries. "
3222  "Rerun with the environment variable TPETRA_VERBOSE="
3223  "CrsGraph set to see the entries of nonlocals_ on every "
3224  "MPI process (WARNING: lots of output).");
3225  }
3226  else {
3227  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3228  (lclNeededGlobalAssemble != 0, std::runtime_error,
3229  "nonlocals_.size()=" << numNonlocals << " != 0 on the "
3230  "calling process. This means either that you incorrectly "
3231  "set the \"No Nonlocal Changes\" fillComplete parameter "
3232  "to true, or that you inserted invalid entries. "
3233  "Rerun with the environment "
3234  "variable TPETRA_VERBOSE=CrsGraph set to see the entries "
3235  "of nonlocals_ on every MPI process (WARNING: lots of "
3236  "output).");
3237  }
3238  }
3239 
3240  // Set domain and range Map. This may clear the Import / Export
3241  // objects if the new Maps differ from any old ones.
3242  setDomainRangeMaps (domainMap, rangeMap);
3243 
3244  // If the graph does not already have a column Map (either from
3245  // the user constructor calling the version of the constructor
3246  // that takes a column Map, or from a previous fillComplete call),
3247  // then create it.
3248  Teuchos::Array<int> remotePIDs (0);
3249  const bool mustBuildColMap = ! this->hasColMap ();
3250  if (mustBuildColMap) {
3251  this->makeColMap (remotePIDs); // resized on output
3252  }
3253 
3254  // Make indices local, if they aren't already.
3255  // The method doesn't do any work if the indices are already local.
3256  const std::pair<size_t, std::string> makeIndicesLocalResult =
3257  this->makeIndicesLocal(verbose);
3258 
3259  if (debug_) {
3260  using Details::gathervPrint;
3261  using Teuchos::RCP;
3262  using Teuchos::REDUCE_MIN;
3263  using Teuchos::reduceAll;
3264  using Teuchos::outArg;
3265 
3266  RCP<const map_type> map = this->getMap ();
3267  RCP<const Teuchos::Comm<int> > comm;
3268  if (! map.is_null ()) {
3269  comm = map->getComm ();
3270  }
3271  if (comm.is_null ()) {
3272  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3273  (makeIndicesLocalResult.first != 0, std::runtime_error,
3274  makeIndicesLocalResult.second);
3275  }
3276  else {
3277  const int lclSuccess = (makeIndicesLocalResult.first == 0);
3278  int gblSuccess = 0; // output argument
3279  reduceAll (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
3280  if (gblSuccess != 1) {
3281  std::ostringstream os;
3282  gathervPrint (os, makeIndicesLocalResult.second, *comm);
3283  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3284  (true, std::runtime_error, os.str ());
3285  }
3286  }
3287  }
3288  else {
3289  // TODO (mfh 20 Jul 2017) Instead of throwing here, pass along
3290  // the error state to makeImportExport or
3291  // computeGlobalConstants, which may do all-reduces and thus may
3292  // have the opportunity to communicate that error state.
3293  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3294  (makeIndicesLocalResult.first != 0, std::runtime_error,
3295  makeIndicesLocalResult.second);
3296  }
3297 
3298  // If this process has no indices, then CrsGraph considers it
3299  // already trivially sorted and merged. Thus, this method need
3300  // not be called on all processes in the row Map's communicator.
3301  this->sortAndMergeAllIndices (this->isSorted (), this->isMerged ());
3302 
3303  // Make Import and Export objects, if they haven't been made
3304  // already. If we made a column Map above, reuse information from
3305  // that process to avoid communiation in the Import setup.
3306  this->makeImportExport (remotePIDs, mustBuildColMap);
3307 
3308  // Create the Kokkos::StaticCrsGraph, if it doesn't already exist.
3309  this->fillLocalGraph (params);
3310 
3311  const bool callComputeGlobalConstants = params.get () == nullptr ||
3312  params->get ("compute global constants", true);
3313  if (callComputeGlobalConstants) {
3314  this->computeGlobalConstants ();
3315  }
3316  else {
3317  this->computeLocalConstants ();
3318  }
3319  this->fillComplete_ = true;
3320  this->checkInternalState ();
3321 
3322  if (verbose) {
3323  std::ostringstream os;
3324  os << *prefix << "Done" << endl;
3325  std::cerr << os.str();
3326  }
3327  }
3328 
3329 
3330  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3331  void
3333  expertStaticFillComplete (const Teuchos::RCP<const map_type>& domainMap,
3334  const Teuchos::RCP<const map_type>& rangeMap,
3335  const Teuchos::RCP<const import_type>& importer,
3336  const Teuchos::RCP<const export_type>& exporter,
3337  const Teuchos::RCP<Teuchos::ParameterList>& params)
3338  {
3339  const char tfecfFuncName[] = "expertStaticFillComplete: ";
3340 #ifdef HAVE_TPETRA_MMM_TIMINGS
3341  std::string label;
3342  if(!params.is_null())
3343  label = params->get("Timer Label",label);
3344  std::string prefix = std::string("Tpetra ")+ label + std::string(": ");
3345  using Teuchos::TimeMonitor;
3346  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Setup"))));
3347 #endif
3348 
3349 
3350  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3351  domainMap.is_null () || rangeMap.is_null (),
3352  std::runtime_error, "The input domain Map and range Map must be nonnull.");
3353  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3354  isFillComplete () || ! hasColMap (), std::runtime_error, "You may not "
3355  "call this method unless the graph has a column Map.");
3356  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3357  getLocalNumRows () > 0 && rowPtrsUnpacked_host_.extent (0) == 0,
3358  std::runtime_error, "The calling process has getLocalNumRows() = "
3359  << getLocalNumRows () << " > 0 rows, but the row offsets array has not "
3360  "been set.");
3361  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3362  static_cast<size_t> (rowPtrsUnpacked_host_.extent (0)) != getLocalNumRows () + 1,
3363  std::runtime_error, "The row offsets array has length " <<
3364  rowPtrsUnpacked_host_.extent (0) << " != getLocalNumRows()+1 = " <<
3365  (getLocalNumRows () + 1) << ".");
3366 
3367  // Note: We don't need to do the following things which are normally done in fillComplete:
3368  // allocateIndices, globalAssemble, makeColMap, makeIndicesLocal, sortAndMergeAllIndices
3369 
3370  // Constants from allocateIndices
3371  //
3372  // mfh 08 Aug 2014: numAllocForAllRows_ and k_numAllocPerRow_ go
3373  // away once the graph is allocated. expertStaticFillComplete
3374  // either presumes that the graph is allocated, or "allocates" it.
3375  //
3376  // FIXME (mfh 08 Aug 2014) The goal for the Kokkos refactor
3377  // version of CrsGraph is to allocate in the constructor, not
3378  // lazily on first insert. That will make both
3379  // numAllocForAllRows_ and k_numAllocPerRow_ obsolete.
3380  numAllocForAllRows_ = 0;
3381  k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
3382  indicesAreAllocated_ = true;
3383 
3384  // Constants from makeIndicesLocal
3385  //
3386  // The graph has a column Map, so its indices had better be local.
3387  indicesAreLocal_ = true;
3388  indicesAreGlobal_ = false;
3389 
3390  // set domain/range map: may clear the import/export objects
3391 #ifdef HAVE_TPETRA_MMM_TIMINGS
3392  MM = Teuchos::null;
3393  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Maps"))));
3394 #endif
3395  setDomainRangeMaps (domainMap, rangeMap);
3396 
3397  // Presume the user sorted and merged the arrays first
3398  indicesAreSorted_ = true;
3399  noRedundancies_ = true;
3400 
3401  // makeImportExport won't create a new importer/exporter if I set one here first.
3402 #ifdef HAVE_TPETRA_MMM_TIMINGS
3403  MM = Teuchos::null;
3404  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckI"))));
3405 #endif
3406 
3407  importer_ = Teuchos::null;
3408  exporter_ = Teuchos::null;
3409  if (importer != Teuchos::null) {
3410  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3411  ! importer->getSourceMap ()->isSameAs (*getDomainMap ()) ||
3412  ! importer->getTargetMap ()->isSameAs (*getColMap ()),
3413  std::invalid_argument,": importer does not match matrix maps.");
3414  importer_ = importer;
3415 
3416  }
3417 
3418 #ifdef HAVE_TPETRA_MMM_TIMINGS
3419  MM = Teuchos::null;
3420  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckE"))));
3421 #endif
3422 
3423  if (exporter != Teuchos::null) {
3424  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3425  ! exporter->getSourceMap ()->isSameAs (*getRowMap ()) ||
3426  ! exporter->getTargetMap ()->isSameAs (*getRangeMap ()),
3427  std::invalid_argument,": exporter does not match matrix maps.");
3428  exporter_ = exporter;
3429  }
3430 
3431 #ifdef HAVE_TPETRA_MMM_TIMINGS
3432  MM = Teuchos::null;
3433  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXmake"))));
3434 #endif
3435  Teuchos::Array<int> remotePIDs (0); // unused output argument
3436  this->makeImportExport (remotePIDs, false);
3437 
3438 #ifdef HAVE_TPETRA_MMM_TIMINGS
3439  MM = Teuchos::null;
3440  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-fLG"))));
3441 #endif
3442  this->fillLocalGraph (params);
3443 
3444  const bool callComputeGlobalConstants = params.get () == nullptr ||
3445  params->get ("compute global constants", true);
3446 
3447  if (callComputeGlobalConstants) {
3448 #ifdef HAVE_TPETRA_MMM_TIMINGS
3449  MM = Teuchos::null;
3450  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cGC (const)"))));
3451 #endif // HAVE_TPETRA_MMM_TIMINGS
3452  this->computeGlobalConstants ();
3453  }
3454  else {
3455 #ifdef HAVE_TPETRA_MMM_TIMINGS
3456  MM = Teuchos::null;
3457  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cGC (noconst)"))));
3458 #endif // HAVE_TPETRA_MMM_TIMINGS
3459  this->computeLocalConstants ();
3460  }
3461 
3462  fillComplete_ = true;
3463 
3464 #ifdef HAVE_TPETRA_MMM_TIMINGS
3465  MM = Teuchos::null;
3466  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cIS"))));
3467 #endif
3468  checkInternalState ();
3469  }
3470 
3471 
3472  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3473  void
3475  fillLocalGraph (const Teuchos::RCP<Teuchos::ParameterList>& params)
3476  {
3478  typedef decltype (k_numRowEntries_) row_entries_type;
3479  typedef typename local_graph_device_type::row_map_type row_map_type;
3480  typedef typename row_map_type::non_const_type non_const_row_map_type;
3481  typedef typename local_graph_device_type::entries_type::non_const_type lclinds_1d_type;
3482  const char tfecfFuncName[] = "fillLocalGraph (called from fillComplete or "
3483  "expertStaticFillComplete): ";
3484  const size_t lclNumRows = this->getLocalNumRows ();
3485 
3486  // This method's goal is to fill in the two arrays (compressed
3487  // sparse row format) that define the sparse graph's structure.
3488 
3489  bool requestOptimizedStorage = true;
3490  if (! params.is_null () && ! params->get ("Optimize Storage", true)) {
3491  requestOptimizedStorage = false;
3492  }
3493 
3494  // The graph's column indices are currently stored in a 1-D
3495  // format, with row offsets in rowPtrsUnpacked_host_ and local column indices
3496  // in k_lclInds1D_.
3497 
3498  if (debug_) {
3499  // The graph's array of row offsets must already be allocated.
3500  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3501  (rowPtrsUnpacked_host_.extent (0) == 0, std::logic_error,
3502  "k_rowPtrs_ has size zero, but shouldn't");
3503  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3504  (rowPtrsUnpacked_host_.extent (0) != lclNumRows + 1, std::logic_error,
3505  "rowPtrsUnpacked_host_.extent(0) = "
3506  << rowPtrsUnpacked_host_.extent (0) << " != (lclNumRows + 1) = "
3507  << (lclNumRows + 1) << ".");
3508  const size_t numOffsets = rowPtrsUnpacked_host_.extent (0);
3509  const auto valToCheck = rowPtrsUnpacked_host_(numOffsets-1);
3510  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3511  (numOffsets != 0 &&
3512  lclIndsUnpacked_wdv.extent (0) != valToCheck,
3513  std::logic_error, "numOffsets=" << numOffsets << " != 0 "
3514  " and lclIndsUnpacked_wdv.extent(0)=" << lclIndsUnpacked_wdv.extent(0)
3515  << " != k_rowPtrs_(" << numOffsets << ")=" << valToCheck
3516  << ".");
3517  }
3518 
3519  size_t allocSize = 0;
3520  try {
3521  allocSize = this->getLocalAllocationSize ();
3522  }
3523  catch (std::logic_error& e) {
3524  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3525  (true, std::logic_error, "getLocalAllocationSize threw "
3526  "std::logic_error: " << e.what ());
3527  }
3528  catch (std::runtime_error& e) {
3529  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3530  (true, std::runtime_error, "getLocalAllocationSize threw "
3531  "std::runtime_error: " << e.what ());
3532  }
3533  catch (std::exception& e) {
3534  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3535  (true, std::runtime_error, "getLocalAllocationSize threw "
3536  "std::exception: " << e.what ());
3537  }
3538  catch (...) {
3539  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3540  (true, std::runtime_error, "getLocalAllocationSize threw "
3541  "an exception not a subclass of std::exception.");
3542  }
3543 
3544  if (this->getLocalNumEntries () != allocSize) {
3545  // Use the nonconst version of row_map_type for ptr_d, because
3546  // the latter is const and we need to modify ptr_d here.
3547  non_const_row_map_type ptr_d;
3548  row_map_type ptr_d_const;
3549 
3550  // The graph's current 1-D storage is "unpacked." This means
3551  // the row offsets may differ from what the final row offsets
3552  // should be. This could happen, for example, if the user set
3553  // an upper bound on the number of entries in each row, but
3554  // didn't fill all those entries.
3555 
3556  if (debug_) {
3557  if (rowPtrsUnpacked_host_.extent (0) != 0) {
3558  const size_t numOffsets =
3559  static_cast<size_t> (rowPtrsUnpacked_host_.extent (0));
3560  const auto valToCheck = rowPtrsUnpacked_host_(numOffsets - 1);
3561  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3562  (valToCheck != size_t(lclIndsUnpacked_wdv.extent(0)),
3563  std::logic_error, "(Unpacked branch) Before allocating "
3564  "or packing, k_rowPtrs_(" << (numOffsets-1) << ")="
3565  << valToCheck << " != lclIndsUnpacked_wdv.extent(0)="
3566  << lclIndsUnpacked_wdv.extent (0) << ".");
3567  }
3568  }
3569 
3570  // Pack the row offsets into ptr_d, by doing a sum-scan of the
3571  // array of valid entry counts per row (k_numRowEntries_).
3572 
3573  // Total number of entries in the matrix on the calling
3574  // process. We will compute this in the loop below. It's
3575  // cheap to compute and useful as a sanity check.
3576  size_t lclTotalNumEntries = 0;
3577  {
3578  // Allocate the packed row offsets array.
3579  ptr_d =
3580  non_const_row_map_type ("Tpetra::CrsGraph::ptr", lclNumRows + 1);
3581  ptr_d_const = ptr_d;
3582 
3583  // It's ok that k_numRowEntries_ is a host View; the
3584  // function can handle this.
3585  typename row_entries_type::const_type numRowEnt_h = k_numRowEntries_;
3586  if (debug_) {
3587  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3588  (size_t(numRowEnt_h.extent (0)) != lclNumRows,
3589  std::logic_error, "(Unpacked branch) "
3590  "numRowEnt_h.extent(0)=" << numRowEnt_h.extent(0)
3591  << " != getLocalNumRows()=" << lclNumRows << "");
3592  }
3593 
3594  lclTotalNumEntries = computeOffsetsFromCounts (ptr_d, numRowEnt_h);
3595 
3596  if (debug_) {
3597  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3598  (static_cast<size_t> (ptr_d.extent (0)) != lclNumRows + 1,
3599  std::logic_error, "(Unpacked branch) After allocating "
3600  "ptr_d, ptr_d.extent(0) = " << ptr_d.extent(0)
3601  << " != lclNumRows+1 = " << (lclNumRows+1) << ".");
3602  const auto valToCheck =
3603  ::Tpetra::Details::getEntryOnHost (ptr_d, lclNumRows);
3604  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3605  (valToCheck != lclTotalNumEntries, std::logic_error,
3606  "Tpetra::CrsGraph::fillLocalGraph: In unpacked branch, "
3607  "after filling ptr_d, ptr_d(lclNumRows=" << lclNumRows
3608  << ") = " << valToCheck << " != total number of entries "
3609  "on the calling process = " << lclTotalNumEntries
3610  << ".");
3611  }
3612  }
3613 
3614  // Allocate the array of packed column indices.
3615  lclinds_1d_type ind_d =
3616  lclinds_1d_type ("Tpetra::CrsGraph::lclInd", lclTotalNumEntries);
3617 
3618  // k_rowPtrs_ and lclIndsUnpacked_wdv are currently unpacked. Pack
3619  // them, using the packed row offsets array ptr_d that we
3620  // created above.
3621  //
3622  // FIXME (mfh 08 Aug 2014) If "Optimize Storage" is false (in
3623  // CrsMatrix?), we need to keep around the unpacked row
3624  // offsets and column indices.
3625 
3626  // Pack the column indices from unpacked lclIndsUnpacked_wdv into
3627  // packed ind_d. We will replace lclIndsUnpacked_wdv below.
3628  typedef pack_functor<
3629  typename local_graph_device_type::entries_type::non_const_type,
3630  typename local_inds_dualv_type::t_dev::const_type,
3631  row_map_type,
3632  typename local_graph_device_type::row_map_type> inds_packer_type;
3633  inds_packer_type f (ind_d,
3634  lclIndsUnpacked_wdv.getDeviceView(Access::ReadOnly),
3635  ptr_d, rowPtrsUnpacked_dev_);
3636  {
3637  typedef typename decltype (ind_d)::execution_space exec_space;
3638  typedef Kokkos::RangePolicy<exec_space, LocalOrdinal> range_type;
3639  Kokkos::parallel_for (range_type (0, lclNumRows), f);
3640  }
3641 
3642  if (debug_) {
3643  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3644  (ptr_d.extent (0) == 0, std::logic_error,
3645  "(\"Optimize Storage\"=true branch) After packing, "
3646  "ptr_d.extent(0)=0. This probably means k_rowPtrs_ was "
3647  "never allocated.");
3648  if (ptr_d.extent (0) != 0) {
3649  const size_t numOffsets = static_cast<size_t> (ptr_d.extent (0));
3650  const auto valToCheck =
3651  ::Tpetra::Details::getEntryOnHost (ptr_d, numOffsets - 1);
3652  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3653  (static_cast<size_t> (valToCheck) != ind_d.extent (0),
3654  std::logic_error, "(\"Optimize Storage\"=true branch) "
3655  "After packing, ptr_d(" << (numOffsets-1) << ")="
3656  << valToCheck << " != ind_d.extent(0)="
3657  << ind_d.extent(0) << ".");
3658  }
3659  }
3660  // Build the local graph.
3661  if (requestOptimizedStorage)
3662  setRowPtrs(ptr_d_const);
3663  else
3664  setRowPtrsPacked(ptr_d_const);
3665  lclIndsPacked_wdv = local_inds_wdv_type(ind_d);
3666  }
3667  else { // We don't have to pack, so just set the pointers.
3668  // Note: The setRowPtrsPacked call has an aditional memcpy to host that we want to avoid
3669  rowPtrsPacked_dev_ = rowPtrsUnpacked_dev_;
3670  rowPtrsPacked_host_ = rowPtrsUnpacked_host_;
3671  lclIndsPacked_wdv = lclIndsUnpacked_wdv;
3672 
3673  if (debug_) {
3674  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3675  (rowPtrsPacked_dev_.extent (0) == 0, std::logic_error,
3676  "(\"Optimize Storage\"=false branch) "
3677  "rowPtrsPacked_dev_.extent(0) = 0. "
3678  "This probably means that "
3679  "k_rowPtrs_ was never allocated.");
3680  if (rowPtrsPacked_dev_.extent (0) != 0) {
3681  const size_t numOffsets =
3682  static_cast<size_t> (rowPtrsPacked_dev_.extent (0));
3683  const size_t valToCheck =
3684  rowPtrsPacked_host_(numOffsets - 1);
3685  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3686  (valToCheck != size_t(lclIndsPacked_wdv.extent (0)),
3687  std::logic_error, "(\"Optimize Storage\"=false branch) "
3688  "rowPtrsPacked_dev_(" << (numOffsets-1) << ")="
3689  << valToCheck
3690  << " != lclIndsPacked_wdv.extent(0)="
3691  << lclIndsPacked_wdv.extent (0) << ".");
3692  }
3693  }
3694  }
3695 
3696  if (debug_) {
3697  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3698  (static_cast<size_t> (rowPtrsPacked_dev_.extent (0)) != lclNumRows + 1,
3699  std::logic_error, "After packing, rowPtrsPacked_dev_.extent(0) = " <<
3700  rowPtrsPacked_dev_.extent (0) << " != lclNumRows+1 = " << (lclNumRows+1)
3701  << ".");
3702  if (rowPtrsPacked_dev_.extent (0) != 0) {
3703  const size_t numOffsets = static_cast<size_t> (rowPtrsPacked_dev_.extent (0));
3704  const auto valToCheck = rowPtrsPacked_host_(numOffsets - 1);
3705  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3706  (static_cast<size_t> (valToCheck) != lclIndsPacked_wdv.extent (0),
3707  std::logic_error, "After packing, rowPtrsPacked_dev_(" << (numOffsets-1)
3708  << ") = " << valToCheck << " != lclIndsPacked_wdv.extent(0) = "
3709  << lclIndsPacked_wdv.extent (0) << ".");
3710  }
3711  }
3712 
3713  if (requestOptimizedStorage) {
3714  // With optimized storage, we don't need to store
3715  // the array of row entry counts.
3716 
3717  // Free graph data structures that are only needed for
3718  // unpacked 1-D storage.
3719  k_numRowEntries_ = row_entries_type ();
3720 
3721  // Keep the new 1-D packed allocations.
3722  lclIndsUnpacked_wdv = lclIndsPacked_wdv;
3723 
3724  storageStatus_ = Details::STORAGE_1D_PACKED;
3725  }
3726 
3727  set_need_sync_host_uvm_access(); // make sure kernel setup of indices is fenced before a host access
3728  }
3729 
3730  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3731  void
3733  replaceColMap (const Teuchos::RCP<const map_type>& newColMap)
3734  {
3735  // NOTE: This safety check matches the code, but not the documentation of Crsgraph
3736  //
3737  // FIXME (mfh 18 Aug 2014) This will break if the calling process
3738  // has no entries, because in that case, currently it is neither
3739  // locally nor globally indexed. This will change once we get rid
3740  // of lazy allocation (so that the constructor allocates indices
3741  // and therefore commits to local vs. global).
3742  const char tfecfFuncName[] = "replaceColMap: ";
3743  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3744  isLocallyIndexed () || isGloballyIndexed (), std::runtime_error,
3745  "Requires matching maps and non-static graph.");
3746  colMap_ = newColMap;
3747  }
3748 
3749  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3750  void
3752  reindexColumns (const Teuchos::RCP<const map_type>& newColMap,
3753  const Teuchos::RCP<const import_type>& newImport,
3754  const bool sortIndicesInEachRow)
3755  {
3756  using Teuchos::REDUCE_MIN;
3757  using Teuchos::reduceAll;
3758  using Teuchos::RCP;
3759  typedef GlobalOrdinal GO;
3760  typedef LocalOrdinal LO;
3761  typedef typename local_inds_dualv_type::t_host col_inds_type;
3762  const char tfecfFuncName[] = "reindexColumns: ";
3763 
3764  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3765  isFillComplete (), std::runtime_error, "The graph is fill complete "
3766  "(isFillComplete() returns true). You must call resumeFill() before "
3767  "you may call this method.");
3768 
3769  // mfh 19 Aug 2014: This method does NOT redistribute data; it
3770  // doesn't claim to do the work of an Import or Export. This
3771  // means that for all processes, the calling process MUST own all
3772  // column indices, in both the old column Map (if it exists) and
3773  // the new column Map. We check this via an all-reduce.
3774  //
3775  // Some processes may be globally indexed, others may be locally
3776  // indexed, and others (that have no graph entries) may be
3777  // neither. This method will NOT change the graph's current
3778  // state. If it's locally indexed, it will stay that way, and
3779  // vice versa. It would easy to add an option to convert indices
3780  // from global to local, so as to save a global-to-local
3781  // conversion pass. However, we don't do this here. The intended
3782  // typical use case is that the graph already has a column Map and
3783  // is locally indexed, and this is the case for which we optimize.
3784 
3785  const LO lclNumRows = static_cast<LO> (this->getLocalNumRows ());
3786 
3787  // Attempt to convert indices to the new column Map's version of
3788  // local. This will fail if on the calling process, the graph has
3789  // indices that are not on that process in the new column Map.
3790  // After the local conversion attempt, we will do an all-reduce to
3791  // see if any processes failed.
3792 
3793  // If this is false, then either the graph contains a column index
3794  // which is invalid in the CURRENT column Map, or the graph is
3795  // locally indexed but currently has no column Map. In either
3796  // case, there is no way to convert the current local indices into
3797  // global indices, so that we can convert them into the new column
3798  // Map's local indices. It's possible for this to be true on some
3799  // processes but not others, due to replaceColMap.
3800  bool allCurColIndsValid = true;
3801  // On the calling process, are all valid current column indices
3802  // also in the new column Map on the calling process? In other
3803  // words, does local reindexing suffice, or should the user have
3804  // done an Import or Export instead?
3805  bool localSuffices = true;
3806 
3807  // Final arrays for the local indices. We will allocate exactly
3808  // one of these ONLY if the graph is locally indexed on the
3809  // calling process, and ONLY if the graph has one or more entries
3810  // (is not empty) on the calling process. In that case, we
3811  // allocate the first (1-D storage) if the graph has a static
3812  // profile, else we allocate the second (2-D storage).
3813  col_inds_type newLclInds1D;
3814  auto oldLclInds1D = lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
3815 
3816  // If indices aren't allocated, that means the calling process
3817  // owns no entries in the graph. Thus, there is nothing to
3818  // convert, and it trivially succeeds locally.
3819  if (indicesAreAllocated ()) {
3820  if (isLocallyIndexed ()) {
3821  if (hasColMap ()) { // locally indexed, and currently has a column Map
3822  const map_type& oldColMap = * (getColMap ());
3823  // Allocate storage for the new local indices.
3824  const size_t allocSize = this->getLocalAllocationSize ();
3825  newLclInds1D = col_inds_type("Tpetra::CrsGraph::lclIndsReindexedHost",
3826  allocSize);
3827  // Attempt to convert the new indices locally.
3828  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
3829  const RowInfo rowInfo = this->getRowInfo (lclRow);
3830  const size_t beg = rowInfo.offset1D;
3831  const size_t end = beg + rowInfo.numEntries;
3832  for (size_t k = beg; k < end; ++k) {
3833  const LO oldLclCol = oldLclInds1D(k);
3834  if (oldLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
3835  allCurColIndsValid = false;
3836  break; // Stop at the first invalid index
3837  }
3838  const GO gblCol = oldColMap.getGlobalElement (oldLclCol);
3839 
3840  // The above conversion MUST succeed. Otherwise, the
3841  // current local index is invalid, which means that
3842  // the graph was constructed incorrectly.
3843  if (gblCol == Teuchos::OrdinalTraits<GO>::invalid ()) {
3844  allCurColIndsValid = false;
3845  break; // Stop at the first invalid index
3846  }
3847  else {
3848  const LO newLclCol = newColMap->getLocalElement (gblCol);
3849  if (newLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
3850  localSuffices = false;
3851  break; // Stop at the first invalid index
3852  }
3853  newLclInds1D(k) = newLclCol;
3854  }
3855  } // for each entry in the current row
3856  } // for each locally owned row
3857  }
3858  else { // locally indexed, but no column Map
3859  // This case is only possible if replaceColMap() was called
3860  // with a null argument on the calling process. It's
3861  // possible, but it means that this method can't possibly
3862  // succeed, since we have no way of knowing how to convert
3863  // the current local indices to global indices.
3864  allCurColIndsValid = false;
3865  }
3866  }
3867  else { // globally indexed
3868  // If the graph is globally indexed, we don't need to save
3869  // local indices, but we _do_ need to know whether the current
3870  // global indices are valid in the new column Map. We may
3871  // need to do a getRemoteIndexList call to find this out.
3872  //
3873  // In this case, it doesn't matter whether the graph currently
3874  // has a column Map. We don't need the old column Map to
3875  // convert from global indices to the _new_ column Map's local
3876  // indices. Furthermore, we can use the same code, whether
3877  // the graph is static or dynamic profile.
3878 
3879  // Test whether the current global indices are in the new
3880  // column Map on the calling process.
3881  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
3882  const RowInfo rowInfo = this->getRowInfo (lclRow);
3883  auto oldGblRowView = this->getGlobalIndsViewHost (rowInfo);
3884  for (size_t k = 0; k < rowInfo.numEntries; ++k) {
3885  const GO gblCol = oldGblRowView(k);
3886  if (! newColMap->isNodeGlobalElement (gblCol)) {
3887  localSuffices = false;
3888  break; // Stop at the first invalid index
3889  }
3890  } // for each entry in the current row
3891  } // for each locally owned row
3892  } // locally or globally indexed
3893  } // whether indices are allocated
3894 
3895  // Do an all-reduce to check both possible error conditions.
3896  int lclSuccess[2];
3897  lclSuccess[0] = allCurColIndsValid ? 1 : 0;
3898  lclSuccess[1] = localSuffices ? 1 : 0;
3899  int gblSuccess[2];
3900  gblSuccess[0] = 0;
3901  gblSuccess[1] = 0;
3902  RCP<const Teuchos::Comm<int> > comm =
3903  getRowMap ().is_null () ? Teuchos::null : getRowMap ()->getComm ();
3904  if (! comm.is_null ()) {
3905  reduceAll<int, int> (*comm, REDUCE_MIN, 2, lclSuccess, gblSuccess);
3906  }
3907 
3908  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3909  gblSuccess[0] == 0, std::runtime_error, "It is not possible to continue."
3910  " The most likely reason is that the graph is locally indexed, but the "
3911  "column Map is missing (null) on some processes, due to a previous call "
3912  "to replaceColMap().");
3913 
3914  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3915  gblSuccess[1] == 0, std::runtime_error, "On some process, the graph "
3916  "contains column indices that are in the old column Map, but not in the "
3917  "new column Map (on that process). This method does NOT redistribute "
3918  "data; it does not claim to do the work of an Import or Export operation."
3919  " This means that for all processess, the calling process MUST own all "
3920  "column indices, in both the old column Map and the new column Map. In "
3921  "this case, you will need to do an Import or Export operation to "
3922  "redistribute data.");
3923 
3924  // Commit the results.
3925  if (isLocallyIndexed ()) {
3926  { // scope the device view; sortAndMergeAllIndices needs host
3927  typename local_inds_dualv_type::t_dev newLclInds1D_dev(
3928  Kokkos::view_alloc("Tpetra::CrsGraph::lclIndReindexed",
3929  Kokkos::WithoutInitializing),
3930  newLclInds1D.extent(0));
3931  Kokkos::deep_copy(newLclInds1D_dev, newLclInds1D);
3932  lclIndsUnpacked_wdv = local_inds_wdv_type(newLclInds1D_dev);
3933  }
3934 
3935  // We've reindexed, so we don't know if the indices are sorted.
3936  //
3937  // FIXME (mfh 17 Sep 2014) It could make sense to check this,
3938  // since we're already going through all the indices above. We
3939  // could also sort each row in place; that way, we would only
3940  // have to make one pass over the rows.
3941  indicesAreSorted_ = false;
3942  if (sortIndicesInEachRow) {
3943  // NOTE (mfh 17 Sep 2014) The graph must be locally indexed in
3944  // order to call this method.
3945  //
3946  // FIXME (mfh 17 Sep 2014) This violates the strong exception
3947  // guarantee. It would be better to sort the new index arrays
3948  // before committing them.
3949  const bool sorted = false; // need to resort
3950  const bool merged = true; // no need to merge, since no dups
3951  this->sortAndMergeAllIndices (sorted, merged);
3952  }
3953  }
3954  colMap_ = newColMap;
3955 
3956  if (newImport.is_null ()) {
3957  // FIXME (mfh 19 Aug 2014) Should use the above all-reduce to
3958  // check whether the input Import is null on any process.
3959  //
3960  // If the domain Map hasn't been set yet, we can't compute a new
3961  // Import object. Leave it what it is; it should be null, but
3962  // it doesn't matter. If the domain Map _has_ been set, then
3963  // compute a new Import object if necessary.
3964  if (! domainMap_.is_null ()) {
3965  if (! domainMap_->isSameAs (* newColMap)) {
3966  importer_ = Teuchos::rcp (new import_type (domainMap_, newColMap));
3967  } else {
3968  importer_ = Teuchos::null; // don't need an Import
3969  }
3970  }
3971  } else {
3972  // The caller gave us an Import object. Assume that it's valid.
3973  importer_ = newImport;
3974  }
3975  }
3976 
3977  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3978  void
3980  replaceDomainMap (const Teuchos::RCP<const map_type>& newDomainMap)
3981  {
3982  const char prefix[] = "Tpetra::CrsGraph::replaceDomainMap: ";
3983  TEUCHOS_TEST_FOR_EXCEPTION(
3984  colMap_.is_null (), std::invalid_argument, prefix << "You may not call "
3985  "this method unless the graph already has a column Map.");
3986  TEUCHOS_TEST_FOR_EXCEPTION(
3987  newDomainMap.is_null (), std::invalid_argument,
3988  prefix << "The new domain Map must be nonnull.");
3989 
3990  // Create a new importer, if needed
3991  Teuchos::RCP<const import_type> newImporter = Teuchos::null;
3992  if (newDomainMap != colMap_ && (! newDomainMap->isSameAs (*colMap_))) {
3993  newImporter = rcp(new import_type(newDomainMap, colMap_));
3994  }
3995  this->replaceDomainMapAndImporter(newDomainMap, newImporter);
3996  }
3997 
3998  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3999  void
4001  replaceDomainMapAndImporter (const Teuchos::RCP<const map_type>& newDomainMap,
4002  const Teuchos::RCP<const import_type>& newImporter)
4003  {
4004  const char prefix[] = "Tpetra::CrsGraph::replaceDomainMapAndImporter: ";
4005  TEUCHOS_TEST_FOR_EXCEPTION(
4006  colMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4007  "this method unless the graph already has a column Map.");
4008  TEUCHOS_TEST_FOR_EXCEPTION(
4009  newDomainMap.is_null (), std::invalid_argument,
4010  prefix << "The new domain Map must be nonnull.");
4011 
4012  if (debug_) {
4013  if (newImporter.is_null ()) {
4014  // It's not a good idea to put expensive operations in a macro
4015  // clause, even if they are side effect - free, because macros
4016  // don't promise that they won't evaluate their arguments more
4017  // than once. It's polite for them to do so, but not required.
4018  const bool colSameAsDom = colMap_->isSameAs (*newDomainMap);
4019  TEUCHOS_TEST_FOR_EXCEPTION
4020  (!colSameAsDom, std::invalid_argument, "If the new Import is null, "
4021  "then the new domain Map must be the same as the current column Map.");
4022  }
4023  else {
4024  const bool colSameAsTgt =
4025  colMap_->isSameAs (* (newImporter->getTargetMap ()));
4026  const bool newDomSameAsSrc =
4027  newDomainMap->isSameAs (* (newImporter->getSourceMap ()));
4028  TEUCHOS_TEST_FOR_EXCEPTION
4029  (! colSameAsTgt || ! newDomSameAsSrc, std::invalid_argument, "If the "
4030  "new Import is nonnull, then the current column Map must be the same "
4031  "as the new Import's target Map, and the new domain Map must be the "
4032  "same as the new Import's source Map.");
4033  }
4034  }
4035 
4036  domainMap_ = newDomainMap;
4037  importer_ = Teuchos::rcp_const_cast<import_type> (newImporter);
4038  }
4039 
4040  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4041  void
4043  replaceRangeMap (const Teuchos::RCP<const map_type>& newRangeMap)
4044  {
4045  const char prefix[] = "Tpetra::CrsGraph::replaceRangeMap: ";
4046  TEUCHOS_TEST_FOR_EXCEPTION(
4047  rowMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4048  "this method unless the graph already has a row Map.");
4049  TEUCHOS_TEST_FOR_EXCEPTION(
4050  newRangeMap.is_null (), std::invalid_argument,
4051  prefix << "The new range Map must be nonnull.");
4052 
4053  // Create a new exporter, if needed
4054  Teuchos::RCP<const export_type> newExporter = Teuchos::null;
4055  if (newRangeMap != rowMap_ && (! newRangeMap->isSameAs (*rowMap_))) {
4056  newExporter = rcp(new export_type(rowMap_, newRangeMap));
4057  }
4058  this->replaceRangeMapAndExporter(newRangeMap, newExporter);
4059  }
4060 
4061  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4062  void
4064  replaceRangeMapAndExporter (const Teuchos::RCP<const map_type>& newRangeMap,
4065  const Teuchos::RCP<const export_type>& newExporter)
4066  {
4067  const char prefix[] = "Tpetra::CrsGraph::replaceRangeMapAndExporter: ";
4068  TEUCHOS_TEST_FOR_EXCEPTION(
4069  rowMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4070  "this method unless the graph already has a column Map.");
4071  TEUCHOS_TEST_FOR_EXCEPTION(
4072  newRangeMap.is_null (), std::invalid_argument,
4073  prefix << "The new domain Map must be nonnull.");
4074 
4075  if (debug_) {
4076  if (newExporter.is_null ()) {
4077  // It's not a good idea to put expensive operations in a macro
4078  // clause, even if they are side effect - free, because macros
4079  // don't promise that they won't evaluate their arguments more
4080  // than once. It's polite for them to do so, but not required.
4081  const bool rowSameAsRange = rowMap_->isSameAs (*newRangeMap);
4082  TEUCHOS_TEST_FOR_EXCEPTION
4083  (!rowSameAsRange, std::invalid_argument, "If the new Export is null, "
4084  "then the new range Map must be the same as the current row Map.");
4085  }
4086  else {
4087  const bool newRangeSameAsTgt =
4088  newRangeMap->isSameAs (* (newExporter->getTargetMap ()));
4089  const bool rowSameAsSrc =
4090  rowMap_->isSameAs (* (newExporter->getSourceMap ()));
4091  TEUCHOS_TEST_FOR_EXCEPTION
4092  (! rowSameAsSrc || ! newRangeSameAsTgt, std::invalid_argument, "If the "
4093  "new Export is nonnull, then the current row Map must be the same "
4094  "as the new Export's source Map, and the new range Map must be the "
4095  "same as the new Export's target Map.");
4096  }
4097  }
4098 
4099  rangeMap_ = newRangeMap;
4100  exporter_ = Teuchos::rcp_const_cast<export_type> (newExporter);
4101  }
4102 
4103 
4104  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4108  {
4109  return local_graph_device_type(
4110  lclIndsPacked_wdv.getDeviceView(Access::ReadWrite),
4111  rowPtrsPacked_dev_);
4112  }
4113 
4114  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4117  getLocalGraphHost () const
4118  {
4119  return local_graph_host_type(
4120  lclIndsPacked_wdv.getHostView(Access::ReadWrite),
4121  rowPtrsPacked_host_);
4122  }
4123 
4124  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4125  void
4128  {
4129  using ::Tpetra::Details::ProfilingRegion;
4130  using Teuchos::ArrayView;
4131  using Teuchos::outArg;
4132  using Teuchos::reduceAll;
4133  typedef global_size_t GST;
4134 
4135  ProfilingRegion regionCGC ("Tpetra::CrsGraph::computeGlobalConstants");
4136 
4137  this->computeLocalConstants ();
4138 
4139  // Compute global constants from local constants. Processes that
4140  // already have local constants still participate in the
4141  // all-reduces, using their previously computed values.
4142  if (! this->haveGlobalConstants_) {
4143  const Teuchos::Comm<int>& comm = * (this->getComm ());
4144  // Promote all the nodeNum* and nodeMaxNum* quantities from
4145  // size_t to global_size_t, when doing the all-reduces for
4146  // globalNum* / globalMaxNum* results.
4147  //
4148  // FIXME (mfh 07 May 2013) Unfortunately, we either have to do
4149  // this in two all-reduces (one for the sum and the other for
4150  // the max), or use a custom MPI_Op that combines the sum and
4151  // the max. The latter might even be slower than two
4152  // all-reduces on modern network hardware. It would also be a
4153  // good idea to use nonblocking all-reduces (MPI 3), so that we
4154  // don't have to wait around for the first one to finish before
4155  // starting the second one.
4156  GST lcl, gbl;
4157  lcl = static_cast<GST> (this->getLocalNumEntries ());
4158 
4159  reduceAll<int,GST> (comm, Teuchos::REDUCE_SUM, 1, &lcl, &gbl);
4160  this->globalNumEntries_ = gbl;
4161 
4162  const GST lclMaxNumRowEnt = static_cast<GST> (this->nodeMaxNumRowEntries_);
4163  reduceAll<int, GST> (comm, Teuchos::REDUCE_MAX, lclMaxNumRowEnt,
4164  outArg (this->globalMaxNumRowEntries_));
4165  this->haveGlobalConstants_ = true;
4166  }
4167  }
4168 
4169 
4170  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4171  void
4174  {
4175  using ::Tpetra::Details::ProfilingRegion;
4176 
4177  ProfilingRegion regionCLC ("Tpetra::CrsGraph::computeLocalConstants");
4178  if (this->haveLocalConstants_) {
4179  return;
4180  }
4181 
4182  // Reset local properties
4183  this->nodeMaxNumRowEntries_ =
4184  Teuchos::OrdinalTraits<size_t>::invalid();
4185 
4186  using LO = local_ordinal_type;
4187 
4188  auto ptr = this->rowPtrsPacked_dev_;
4189  const LO lclNumRows = ptr.extent(0) == 0 ?
4190  static_cast<LO> (0) :
4191  (static_cast<LO> (ptr.extent(0)) - static_cast<LO> (1));
4192 
4193  const LO lclMaxNumRowEnt =
4194  ::Tpetra::Details::maxDifference ("Tpetra::CrsGraph: nodeMaxNumRowEntries",
4195  ptr, lclNumRows);
4196  this->nodeMaxNumRowEntries_ = static_cast<size_t> (lclMaxNumRowEnt);
4197  this->haveLocalConstants_ = true;
4198  }
4199 
4200 
4201  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4202  std::pair<size_t, std::string>
4204  makeIndicesLocal (const bool verbose)
4205  {
4207  using Teuchos::arcp;
4208  using Teuchos::Array;
4209  using std::endl;
4210  typedef LocalOrdinal LO;
4211  typedef GlobalOrdinal GO;
4212  typedef device_type DT;
4213  typedef typename local_graph_device_type::row_map_type::non_const_value_type offset_type;
4214  typedef decltype (k_numRowEntries_) row_entries_type;
4215  typedef typename row_entries_type::non_const_value_type num_ent_type;
4216  const char tfecfFuncName[] = "makeIndicesLocal: ";
4217  ProfilingRegion regionMakeIndicesLocal ("Tpetra::CrsGraph::makeIndicesLocal");
4218 
4219  std::unique_ptr<std::string> prefix;
4220  if (verbose) {
4221  prefix = this->createPrefix("CrsGraph", "makeIndicesLocal");
4222  std::ostringstream os;
4223  os << *prefix << "lclNumRows: " << getLocalNumRows() << endl;
4224  std::cerr << os.str();
4225  }
4226 
4227  // These are somewhat global properties, so it's safe to have
4228  // exception checks for them, rather than returning an error code.
4229  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4230  (! this->hasColMap (), std::logic_error, "The graph does not have a "
4231  "column Map yet. This method should never be called in that case. "
4232  "Please report this bug to the Tpetra developers.");
4233  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4234  (this->getColMap ().is_null (), std::logic_error, "The graph claims "
4235  "that it has a column Map, because hasColMap() returns true. However, "
4236  "the result of getColMap() is null. This should never happen. Please "
4237  "report this bug to the Tpetra developers.");
4238 
4239  // Return value 1: The number of column indices (counting
4240  // duplicates) that could not be converted to local indices,
4241  // because they were not in the column Map on the calling process.
4242  size_t lclNumErrs = 0;
4243  std::ostringstream errStrm; // for return value 2 (error string)
4244 
4245  const LO lclNumRows = static_cast<LO> (this->getLocalNumRows ());
4246  const map_type& colMap = * (this->getColMap ());
4247 
4248  if (this->isGloballyIndexed () && lclNumRows != 0) {
4249  // This is a host-accessible View.
4250  typename row_entries_type::const_type h_numRowEnt =
4251  this->k_numRowEntries_;
4252 
4253  // Allocate space for local indices.
4254  if (rowPtrsUnpacked_host_.extent (0) == 0) {
4255  errStrm << "k_rowPtrs_.extent(0) == 0. This should never "
4256  "happen here. Please report this bug to the Tpetra developers."
4257  << endl;
4258  // Need to return early.
4259  return std::make_pair(Tpetra::Details::OrdinalTraits<size_t>::invalid (),
4260  errStrm.str ());
4261  }
4262  const auto numEnt = rowPtrsUnpacked_host_(lclNumRows);
4263 
4264  // mfh 17 Dec 2016: We don't need initial zero-fill of
4265  // lclIndsUnpacked_wdv, because we will fill it below anyway.
4266  // AllowPadding would only help for aligned access (e.g.,
4267  // for vectorization) if we also were to pad each row to the
4268  // same alignment, so we'll skip AllowPadding for now.
4269 
4270  // using Kokkos::AllowPadding;
4271  using Kokkos::view_alloc;
4272  using Kokkos::WithoutInitializing;
4273 
4274  // When giving the label as an argument to
4275  // Kokkos::view_alloc, the label must be a string and not a
4276  // char*, else the code won't compile. This is because
4277  // view_alloc also allows a raw pointer as its first
4278  // argument. See
4279  // https://github.com/kokkos/kokkos/issues/434. This is a
4280  // large allocation typically, so the overhead of creating
4281  // an std::string is minor.
4282  const std::string label ("Tpetra::CrsGraph::lclInd");
4283  if (verbose) {
4284  std::ostringstream os;
4285  os << *prefix << "(Re)allocate lclInd_wdv: old="
4286  << lclIndsUnpacked_wdv.extent(0) << ", new=" << numEnt << endl;
4287  std::cerr << os.str();
4288  }
4289 
4290  local_inds_dualv_type lclInds_dualv =
4291  local_inds_dualv_type(view_alloc(label, WithoutInitializing),
4292  numEnt);
4293  lclIndsUnpacked_wdv = local_inds_wdv_type(lclInds_dualv);
4294 
4295  auto lclColMap = colMap.getLocalMap ();
4296  // This is a "device mirror" of the host View h_numRowEnt.
4297  //
4298  // NOTE (mfh 27 Sep 2016) Currently, the right way to get a
4299  // Device instance is to use its default constructor. See the
4300  // following Kokkos issue:
4301  //
4302  // https://github.com/kokkos/kokkos/issues/442
4303  if (verbose) {
4304  std::ostringstream os;
4305  os << *prefix << "Allocate device mirror k_numRowEnt: "
4306  << h_numRowEnt.extent(0) << endl;
4307  std::cerr << os.str();
4308  }
4309  auto k_numRowEnt =
4310  Kokkos::create_mirror_view_and_copy (device_type (), h_numRowEnt);
4311 
4313  lclNumErrs =
4314  convertColumnIndicesFromGlobalToLocal<LO, GO, DT, offset_type, num_ent_type> (
4315  lclIndsUnpacked_wdv.getDeviceView(Access::OverwriteAll),
4316  gblInds_wdv.getDeviceView(Access::ReadOnly),
4317  rowPtrsUnpacked_dev_,
4318  lclColMap,
4319  k_numRowEnt);
4320  if (lclNumErrs != 0) {
4321  const int myRank = [this] () {
4322  auto map = this->getMap ();
4323  if (map.is_null ()) {
4324  return 0;
4325  }
4326  else {
4327  auto comm = map->getComm ();
4328  return comm.is_null () ? 0 : comm->getRank ();
4329  }
4330  } ();
4331  const bool pluralNumErrs = (lclNumErrs != static_cast<size_t> (1));
4332  errStrm << "(Process " << myRank << ") When converting column "
4333  "indices from global to local, we encountered " << lclNumErrs
4334  << " ind" << (pluralNumErrs ? "ices" : "ex")
4335  << " that do" << (pluralNumErrs ? "es" : "")
4336  << " not live in the column Map on this process." << endl;
4337  }
4338 
4339  // We've converted column indices from global to local, so we
4340  // can deallocate the global column indices (which we know are
4341  // in 1-D storage, because the graph has static profile).
4342  if (verbose) {
4343  std::ostringstream os;
4344  os << *prefix << "Free gblInds_wdv: "
4345  << gblInds_wdv.extent(0) << endl;
4346  std::cerr << os.str();
4347  }
4348  gblInds_wdv = global_inds_wdv_type ();
4349  } // globallyIndexed() && lclNumRows > 0
4350 
4351  this->indicesAreLocal_ = true;
4352  this->indicesAreGlobal_ = false;
4353  this->checkInternalState ();
4354 
4355  return std::make_pair (lclNumErrs, errStrm.str ());
4356  }
4357 
4358  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4359  void
4361  makeColMap (Teuchos::Array<int>& remotePIDs)
4362  {
4364  using std::endl;
4365  const char tfecfFuncName[] = "makeColMap";
4366 
4367  ProfilingRegion regionSortAndMerge ("Tpetra::CrsGraph::makeColMap");
4368  std::unique_ptr<std::string> prefix;
4369  if (verbose_) {
4370  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4371  std::ostringstream os;
4372  os << *prefix << "Start" << endl;
4373  std::cerr << os.str();
4374  }
4375 
4376  // this->colMap_ should be null at this point, but we accept the
4377  // future possibility that it might not be (esp. if we decide
4378  // later to support graph structure changes after first
4379  // fillComplete, which CrsGraph does not currently (as of 12 Feb
4380  // 2017) support).
4381  Teuchos::RCP<const map_type> colMap = this->colMap_;
4382  const bool sortEachProcsGids =
4383  this->sortGhostsAssociatedWithEachProcessor_;
4384 
4385  // FIXME (mfh 12 Feb 2017) ::Tpetra::Details::makeColMap returns a
4386  // per-process error code. If an error does occur on a process,
4387  // ::Tpetra::Details::makeColMap does NOT promise that all processes will
4388  // notice that error. This is the caller's responsibility. For
4389  // now, we only propagate (to all processes) and report the error
4390  // in debug mode. In the future, we need to add the local/global
4391  // error handling scheme used in BlockCrsMatrix to this class.
4392  if (debug_) {
4393  using Teuchos::outArg;
4394  using Teuchos::REDUCE_MIN;
4395  using Teuchos::reduceAll;
4396 
4397  std::ostringstream errStrm;
4398  const int lclErrCode =
4399  Details::makeColMap (colMap, remotePIDs,
4400  getDomainMap (), *this, sortEachProcsGids, &errStrm);
4401  auto comm = this->getComm ();
4402  if (! comm.is_null ()) {
4403  const int lclSuccess = (lclErrCode == 0) ? 1 : 0;
4404  int gblSuccess = 0; // output argument
4405  reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess,
4406  outArg (gblSuccess));
4407  if (gblSuccess != 1) {
4408  std::ostringstream os;
4409  Details::gathervPrint (os, errStrm.str (), *comm);
4410  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4411  (true, std::runtime_error, ": An error happened on at "
4412  "least one process in the CrsGraph's communicator. "
4413  "Here are all processes' error messages:" << std::endl
4414  << os.str ());
4415  }
4416  }
4417  }
4418  else {
4419  (void) Details::makeColMap (colMap, remotePIDs,
4420  getDomainMap (), *this, sortEachProcsGids, nullptr);
4421  }
4422  // See above. We want to admit the possibility of makeColMap
4423  // actually revising an existing column Map, even though that
4424  // doesn't currently (as of 10 May 2017) happen.
4425  this->colMap_ = colMap;
4426 
4427  checkInternalState ();
4428  if (verbose_) {
4429  std::ostringstream os;
4430  os << *prefix << "Done" << endl;
4431  std::cerr << os.str();
4432  }
4433  }
4434 
4435 
4436  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4437  void
4439  sortAndMergeAllIndices (const bool sorted, const bool merged)
4440  {
4441  using std::endl;
4442  using LO = LocalOrdinal;
4443  using host_execution_space =
4444  typename Kokkos::View<LO*, device_type>::HostMirror::
4445  execution_space;
4446  using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
4447  const char tfecfFuncName[] = "sortAndMergeAllIndices";
4448  Details::ProfilingRegion regionSortAndMerge
4449  ("Tpetra::CrsGraph::sortAndMergeAllIndices");
4450 
4451  std::unique_ptr<std::string> prefix;
4452  if (verbose_) {
4453  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4454  std::ostringstream os;
4455  os << *prefix << "Start: "
4456  << "sorted=" << (sorted ? "true" : "false")
4457  << ", merged=" << (merged ? "true" : "false") << endl;
4458  std::cerr << os.str();
4459  }
4460  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4461  (this->isGloballyIndexed(), std::logic_error,
4462  "This method may only be called after makeIndicesLocal." );
4463  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4464  (! merged && this->isStorageOptimized(), std::logic_error,
4465  "The graph is already storage optimized, so we shouldn't be "
4466  "merging any indices. "
4467  "Please report this bug to the Tpetra developers.");
4468 
4469  if (! sorted || ! merged) {
4470  const LO lclNumRows(this->getLocalNumRows());
4471  auto range = range_type(0, lclNumRows);
4472 
4473  if (verbose_) {
4474  size_t totalNumDups = 0;
4475  Kokkos::parallel_reduce(range,
4476  [this, sorted, merged] (const LO lclRow, size_t& numDups)
4477  {
4478  const RowInfo rowInfo = this->getRowInfo(lclRow);
4479  numDups += this->sortAndMergeRowIndices(rowInfo, sorted, merged);
4480  },
4481  totalNumDups);
4482  std::ostringstream os;
4483  os << *prefix << "totalNumDups=" << totalNumDups << endl;
4484  std::cerr << os.str();
4485  }
4486  else {
4487  Kokkos::parallel_for(range,
4488  [this, sorted, merged] (const LO lclRow)
4489  {
4490  const RowInfo rowInfo = this->getRowInfo(lclRow);
4491  this->sortAndMergeRowIndices(rowInfo, sorted, merged);
4492  });
4493  }
4494  this->indicesAreSorted_ = true; // we just sorted every row
4495  this->noRedundancies_ = true; // we just merged every row
4496  }
4497 
4498  if (verbose_) {
4499  std::ostringstream os;
4500  os << *prefix << "Done" << endl;
4501  std::cerr << os.str();
4502  }
4503  }
4504 
4505  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4506  void
4508  makeImportExport (Teuchos::Array<int>& remotePIDs,
4509  const bool useRemotePIDs)
4510  {
4511  using ::Tpetra::Details::ProfilingRegion;
4512  using Teuchos::ParameterList;
4513  using Teuchos::RCP;
4514  using Teuchos::rcp;
4515  const char tfecfFuncName[] = "makeImportExport: ";
4516  ProfilingRegion regionMIE ("Tpetra::CrsGraph::makeImportExport");
4517 
4518  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4519  (! this->hasColMap (), std::logic_error,
4520  "This method may not be called unless the graph has a column Map.");
4521  RCP<ParameterList> params = this->getNonconstParameterList (); // could be null
4522 
4523  // Don't do any checks to see if we need to create the Import, if
4524  // it exists already.
4525  //
4526  // FIXME (mfh 25 Mar 2013) This will become incorrect if we
4527  // change CrsGraph in the future to allow changing the column
4528  // Map after fillComplete. For now, the column Map is fixed
4529  // after the first fillComplete call.
4530  if (importer_.is_null ()) {
4531  // Create the Import instance if necessary.
4532  if (domainMap_ != colMap_ && (! domainMap_->isSameAs (*colMap_))) {
4533  if (params.is_null () || ! params->isSublist ("Import")) {
4534  if (useRemotePIDs) {
4535  importer_ = rcp (new import_type (domainMap_, colMap_, remotePIDs));
4536  }
4537  else {
4538  importer_ = rcp (new import_type (domainMap_, colMap_));
4539  }
4540  }
4541  else {
4542  RCP<ParameterList> importSublist = sublist (params, "Import", true);
4543  if (useRemotePIDs) {
4544  RCP<import_type> newImp =
4545  rcp (new import_type (domainMap_, colMap_, remotePIDs,
4546  importSublist));
4547  importer_ = newImp;
4548  }
4549  else {
4550  importer_ = rcp (new import_type (domainMap_, colMap_, importSublist));
4551  }
4552  }
4553  }
4554  }
4555 
4556  // Don't do any checks to see if we need to create the Export, if
4557  // it exists already.
4558  if (exporter_.is_null ()) {
4559  // Create the Export instance if necessary.
4560  if (rangeMap_ != rowMap_ && ! rangeMap_->isSameAs (*rowMap_)) {
4561  if (params.is_null () || ! params->isSublist ("Export")) {
4562  exporter_ = rcp (new export_type (rowMap_, rangeMap_));
4563  }
4564  else {
4565  RCP<ParameterList> exportSublist = sublist (params, "Export", true);
4566  exporter_ = rcp (new export_type (rowMap_, rangeMap_, exportSublist));
4567  }
4568  }
4569  }
4570  }
4571 
4572 
4573  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4574  std::string
4577  {
4578  std::ostringstream oss;
4579  oss << dist_object_type::description ();
4580  if (isFillComplete ()) {
4581  oss << "{status = fill complete"
4582  << ", global rows = " << getGlobalNumRows()
4583  << ", global cols = " << getGlobalNumCols()
4584  << ", global num entries = " << getGlobalNumEntries()
4585  << "}";
4586  }
4587  else {
4588  oss << "{status = fill not complete"
4589  << ", global rows = " << getGlobalNumRows()
4590  << "}";
4591  }
4592  return oss.str();
4593  }
4594 
4595 
4596  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4597  void
4599  describe (Teuchos::FancyOStream &out,
4600  const Teuchos::EVerbosityLevel verbLevel) const
4601  {
4602  using Teuchos::ArrayView;
4603  using Teuchos::Comm;
4604  using Teuchos::RCP;
4605  using Teuchos::VERB_DEFAULT;
4606  using Teuchos::VERB_NONE;
4607  using Teuchos::VERB_LOW;
4608  using Teuchos::VERB_MEDIUM;
4609  using Teuchos::VERB_HIGH;
4610  using Teuchos::VERB_EXTREME;
4611  using std::endl;
4612  using std::setw;
4613 
4614  Teuchos::EVerbosityLevel vl = verbLevel;
4615  if (vl == VERB_DEFAULT) vl = VERB_LOW;
4616  RCP<const Comm<int> > comm = this->getComm();
4617  const int myImageID = comm->getRank(),
4618  numImages = comm->getSize();
4619  size_t width = 1;
4620  for (size_t dec=10; dec<getGlobalNumRows(); dec *= 10) {
4621  ++width;
4622  }
4623  width = std::max<size_t> (width, static_cast<size_t> (11)) + 2;
4624  Teuchos::OSTab tab (out);
4625  // none: print nothing
4626  // low: print O(1) info from node 0
4627  // medium: print O(P) info, num entries per node
4628  // high: print O(N) info, num entries per row
4629  // extreme: print O(NNZ) info: print graph indices
4630  //
4631  // for medium and higher, print constituent objects at specified verbLevel
4632  if (vl != VERB_NONE) {
4633  if (myImageID == 0) out << this->description() << std::endl;
4634  // O(1) globals, minus what was already printed by description()
4635  if (isFillComplete() && myImageID == 0) {
4636  out << "Global max number of row entries = " << globalMaxNumRowEntries_ << std::endl;
4637  }
4638  // constituent objects
4639  if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
4640  if (myImageID == 0) out << "\nRow map: " << std::endl;
4641  rowMap_->describe(out,vl);
4642  if (colMap_ != Teuchos::null) {
4643  if (myImageID == 0) out << "\nColumn map: " << std::endl;
4644  colMap_->describe(out,vl);
4645  }
4646  if (domainMap_ != Teuchos::null) {
4647  if (myImageID == 0) out << "\nDomain map: " << std::endl;
4648  domainMap_->describe(out,vl);
4649  }
4650  if (rangeMap_ != Teuchos::null) {
4651  if (myImageID == 0) out << "\nRange map: " << std::endl;
4652  rangeMap_->describe(out,vl);
4653  }
4654  }
4655  // O(P) data
4656  if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
4657  for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
4658  if (myImageID == imageCtr) {
4659  out << "Node ID = " << imageCtr << std::endl
4660  << "Node number of entries = " << this->getLocalNumEntries () << std::endl
4661  << "Node max number of entries = " << nodeMaxNumRowEntries_ << std::endl;
4662  if (! indicesAreAllocated ()) {
4663  out << "Indices are not allocated." << std::endl;
4664  }
4665  }
4666  comm->barrier();
4667  comm->barrier();
4668  comm->barrier();
4669  }
4670  }
4671  // O(N) and O(NNZ) data
4672  if (vl == VERB_HIGH || vl == VERB_EXTREME) {
4673  for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
4674  if (myImageID == imageCtr) {
4675  out << std::setw(width) << "Node ID"
4676  << std::setw(width) << "Global Row"
4677  << std::setw(width) << "Num Entries";
4678  if (vl == VERB_EXTREME) {
4679  out << " Entries";
4680  }
4681  out << std::endl;
4682  const LocalOrdinal lclNumRows =
4683  static_cast<LocalOrdinal> (this->getLocalNumRows ());
4684  for (LocalOrdinal r=0; r < lclNumRows; ++r) {
4685  const RowInfo rowinfo = this->getRowInfo (r);
4686  GlobalOrdinal gid = rowMap_->getGlobalElement(r);
4687  out << std::setw(width) << myImageID
4688  << std::setw(width) << gid
4689  << std::setw(width) << rowinfo.numEntries;
4690  if (vl == VERB_EXTREME) {
4691  out << " ";
4692  if (isGloballyIndexed()) {
4693  auto rowview = gblInds_wdv.getHostView(Access::ReadOnly);
4694  for (size_t j=0; j < rowinfo.numEntries; ++j){
4695  GlobalOrdinal colgid = rowview[j + rowinfo.offset1D];
4696  out << colgid << " ";
4697  }
4698  }
4699  else if (isLocallyIndexed()) {
4700  auto rowview = lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
4701  for (size_t j=0; j < rowinfo.numEntries; ++j) {
4702  LocalOrdinal collid = rowview[j + rowinfo.offset1D];
4703  out << colMap_->getGlobalElement(collid) << " ";
4704  }
4705  }
4706  }
4707  out << std::endl;
4708  }
4709  }
4710  comm->barrier();
4711  comm->barrier();
4712  comm->barrier();
4713  }
4714  }
4715  }
4716  }
4717 
4718 
4719  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4720  bool
4722  checkSizes (const SrcDistObject& /* source */)
4723  {
4724  // It's not clear what kind of compatibility checks on sizes can
4725  // be performed here. Epetra_CrsGraph doesn't check any sizes for
4726  // compatibility.
4727  return true;
4728  }
4729 
4730  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4731  void
4734  (const SrcDistObject& source,
4735  const size_t numSameIDs,
4736  const Kokkos::DualView<const local_ordinal_type*,
4737  buffer_device_type>& permuteToLIDs,
4738  const Kokkos::DualView<const local_ordinal_type*,
4739  buffer_device_type>& permuteFromLIDs,
4740  const CombineMode /*CM*/)
4741  {
4742  using std::endl;
4743  using LO = local_ordinal_type;
4744  using GO = global_ordinal_type;
4745  using this_CRS_type = CrsGraph<LO, GO, node_type>;
4746  const char tfecfFuncName[] = "copyAndPermute: ";
4747  const bool verbose = verbose_;
4748 
4749  std::unique_ptr<std::string> prefix;
4750  if (verbose) {
4751  prefix = this->createPrefix("CrsGraph", "copyAndPermute");
4752  std::ostringstream os;
4753  os << *prefix << endl;
4754  std::cerr << os.str ();
4755  }
4756 
4757  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4758  (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0),
4759  std::runtime_error, "permuteToLIDs.extent(0) = "
4760  << permuteToLIDs.extent (0) << " != permuteFromLIDs.extent(0) = "
4761  << permuteFromLIDs.extent (0) << ".");
4762 
4763  // We know from checkSizes that the source object is a
4764  // row_graph_type, so we don't need to check again.
4765  const row_graph_type& srcRowGraph =
4766  dynamic_cast<const row_graph_type&> (source);
4767 
4768  if (verbose) {
4769  std::ostringstream os;
4770  os << *prefix << "Compute padding" << endl;
4771  std::cerr << os.str ();
4772  }
4773  auto padding = computeCrsPadding(srcRowGraph, numSameIDs,
4774  permuteToLIDs, permuteFromLIDs, verbose);
4775  applyCrsPadding(*padding, verbose);
4776 
4777  // If the source object is actually a CrsGraph, we can use view
4778  // mode instead of copy mode to access the entries in each row,
4779  // if the graph is not fill complete.
4780  const this_CRS_type* srcCrsGraph =
4781  dynamic_cast<const this_CRS_type*> (&source);
4782 
4783  const map_type& srcRowMap = *(srcRowGraph.getRowMap());
4784  const map_type& tgtRowMap = *(getRowMap());
4785  const bool src_filled = srcRowGraph.isFillComplete();
4786  nonconst_global_inds_host_view_type row_copy;
4787  LO myid = 0;
4788 
4789  //
4790  // "Copy" part of "copy and permute."
4791  //
4792  if (src_filled || srcCrsGraph == nullptr) {
4793  if (verbose) {
4794  std::ostringstream os;
4795  os << *prefix << "src_filled || srcCrsGraph == nullptr" << endl;
4796  std::cerr << os.str ();
4797  }
4798  // If the source graph is fill complete, we can't use view mode,
4799  // because the data might be stored in a different format not
4800  // compatible with the expectations of view mode. Also, if the
4801  // source graph is not a CrsGraph, we can't use view mode,
4802  // because RowGraph only provides copy mode access to the data.
4803  for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
4804  const GO gid = srcRowMap.getGlobalElement (myid);
4805  size_t row_length = srcRowGraph.getNumEntriesInGlobalRow (gid);
4806  Kokkos::resize(row_copy,row_length);
4807  size_t check_row_length = 0;
4808  srcRowGraph.getGlobalRowCopy (gid, row_copy, check_row_length);
4809  this->insertGlobalIndices (gid, row_length, row_copy.data());
4810  }
4811  } else {
4812  if (verbose) {
4813  std::ostringstream os;
4814  os << *prefix << "! src_filled && srcCrsGraph != nullptr" << endl;
4815  std::cerr << os.str ();
4816  }
4817  for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
4818  const GO gid = srcRowMap.getGlobalElement (myid);
4819  global_inds_host_view_type row;
4820  srcCrsGraph->getGlobalRowView (gid, row);
4821  this->insertGlobalIndices (gid, row.extent(0), row.data());
4822  }
4823  }
4824 
4825  //
4826  // "Permute" part of "copy and permute."
4827  //
4828  auto permuteToLIDs_h = permuteToLIDs.view_host ();
4829  auto permuteFromLIDs_h = permuteFromLIDs.view_host ();
4830 
4831  if (src_filled || srcCrsGraph == nullptr) {
4832  for (LO i = 0; i < static_cast<LO> (permuteToLIDs_h.extent (0)); ++i) {
4833  const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs_h[i]);
4834  const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs_h[i]);
4835  size_t row_length = srcRowGraph.getNumEntriesInGlobalRow (srcgid);
4836  Kokkos::resize(row_copy,row_length);
4837  size_t check_row_length = 0;
4838  srcRowGraph.getGlobalRowCopy (srcgid, row_copy, check_row_length);
4839  this->insertGlobalIndices (mygid, row_length, row_copy.data());
4840  }
4841  } else {
4842  for (LO i = 0; i < static_cast<LO> (permuteToLIDs_h.extent (0)); ++i) {
4843  const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs_h[i]);
4844  const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs_h[i]);
4845  global_inds_host_view_type row;
4846  srcCrsGraph->getGlobalRowView (srcgid, row);
4847  this->insertGlobalIndices (mygid, row.extent(0), row.data());
4848  }
4849  }
4850 
4851  if (verbose) {
4852  std::ostringstream os;
4853  os << *prefix << "Done" << endl;
4854  std::cerr << os.str ();
4855  }
4856  }
4857 
4858  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4859  void
4860  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
4861  applyCrsPadding(const padding_type& padding,
4862  const bool verbose)
4863  {
4864  using Details::ProfilingRegion;
4865  using Details::padCrsArrays;
4866  using std::endl;
4867  using LO = local_ordinal_type;
4868  using row_ptrs_type =
4869  typename local_graph_device_type::row_map_type::non_const_type;
4870  using range_policy =
4871  Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
4872  const char tfecfFuncName[] = "applyCrsPadding";
4873  ProfilingRegion regionCAP("Tpetra::CrsGraph::applyCrsPadding");
4874 
4875  std::unique_ptr<std::string> prefix;
4876  if (verbose) {
4877  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4878  std::ostringstream os;
4879  os << *prefix << "padding: ";
4880  padding.print(os);
4881  os << endl;
4882  std::cerr << os.str();
4883  }
4884  const int myRank = ! verbose ? -1 : [&] () {
4885  auto map = this->getMap();
4886  if (map.is_null()) {
4887  return -1;
4888  }
4889  auto comm = map->getComm();
4890  if (comm.is_null()) {
4891  return -1;
4892  }
4893  return comm->getRank();
4894  } ();
4895 
4896  // FIXME (mfh 10 Feb 2020) We shouldn't actually reallocate
4897  // row_ptrs_beg or allocate row_ptrs_end unless the allocation
4898  // size needs to increase. That should be the job of
4899  // padCrsArrays.
4900 
4901  // Assume global indexing we don't have any indices yet
4902  if (! indicesAreAllocated()) {
4903  if (verbose) {
4904  std::ostringstream os;
4905  os << *prefix << "Call allocateIndices" << endl;
4906  std::cerr << os.str();
4907  }
4908  allocateIndices(GlobalIndices, verbose);
4909  }
4910  TEUCHOS_ASSERT( indicesAreAllocated() );
4911 
4912  // Making copies here because k_rowPtrs_ has a const type. Otherwise, we
4913  // would use it directly.
4914 
4915  if (verbose) {
4916  std::ostringstream os;
4917  os << *prefix << "Allocate row_ptrs_beg: "
4918  << rowPtrsUnpacked_dev_.extent(0) << endl;
4919  std::cerr << os.str();
4920  }
4921  using Kokkos::view_alloc;
4922  using Kokkos::WithoutInitializing;
4923  row_ptrs_type row_ptrs_beg(
4924  view_alloc("row_ptrs_beg", WithoutInitializing),
4925  rowPtrsUnpacked_dev_.extent(0));
4926  // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
4927  Kokkos::deep_copy(execution_space(),row_ptrs_beg, rowPtrsUnpacked_dev_);
4928 
4929  const size_t N = row_ptrs_beg.extent(0) == 0 ? size_t(0) :
4930  size_t(row_ptrs_beg.extent(0) - 1);
4931  if (verbose) {
4932  std::ostringstream os;
4933  os << *prefix << "Allocate row_ptrs_end: " << N << endl;
4934  std::cerr << os.str();
4935  }
4936  row_ptrs_type row_ptrs_end(
4937  view_alloc("row_ptrs_end", WithoutInitializing), N);
4938  row_ptrs_type num_row_entries;
4939 
4940  const bool refill_num_row_entries = k_numRowEntries_.extent(0) != 0;
4941 
4942  execution_space().fence(); // we need above deep_copy to be done
4943 
4944  if (refill_num_row_entries) { // Case 1: Unpacked storage
4945  // We can't assume correct *this capture until C++17, and it's
4946  // likely more efficient just to capture what we need anyway.
4947  num_row_entries =
4948  row_ptrs_type(view_alloc("num_row_entries", WithoutInitializing), N);
4949  Kokkos::deep_copy(num_row_entries, this->k_numRowEntries_);
4950  Kokkos::parallel_for
4951  ("Fill end row pointers", range_policy(0, N),
4952  KOKKOS_LAMBDA (const size_t i) {
4953  row_ptrs_end(i) = row_ptrs_beg(i) + num_row_entries(i);
4954  });
4955  }
4956  else {
4957  // FIXME (mfh 10 Feb 2020) Fix padCrsArrays so that if packed
4958  // storage, we don't need row_ptr_end to be separate allocation;
4959  // could just have it alias row_ptr_beg+1.
4960  Kokkos::parallel_for
4961  ("Fill end row pointers", range_policy(0, N),
4962  KOKKOS_LAMBDA (const size_t i) {
4963  row_ptrs_end(i) = row_ptrs_beg(i+1);
4964  });
4965  }
4966 
4967  if (isGloballyIndexed()) {
4968  padCrsArrays(row_ptrs_beg, row_ptrs_end, gblInds_wdv,
4969  padding, myRank, verbose);
4970  }
4971  else {
4972  padCrsArrays(row_ptrs_beg, row_ptrs_end, lclIndsUnpacked_wdv,
4973  padding, myRank, verbose);
4974  }
4975 
4976  if (refill_num_row_entries) {
4977  Kokkos::parallel_for
4978  ("Fill num entries", range_policy(0, N),
4979  KOKKOS_LAMBDA (const size_t i) {
4980  num_row_entries(i) = row_ptrs_end(i) - row_ptrs_beg(i);
4981  });
4982  Kokkos::deep_copy(this->k_numRowEntries_, num_row_entries);
4983  }
4984  if (verbose) {
4985  std::ostringstream os;
4986  os << *prefix << "Reassign k_rowPtrs_; old size: "
4987  << rowPtrsUnpacked_dev_.extent(0) << ", new size: "
4988  << row_ptrs_beg.extent(0) << endl;
4989  std::cerr << os.str();
4990  TEUCHOS_ASSERT( rowPtrsUnpacked_dev_.extent(0) == row_ptrs_beg.extent(0) );
4991  }
4992 
4993  setRowPtrsUnpacked(row_ptrs_beg);
4994 
4995  set_need_sync_host_uvm_access(); // need fence before host UVM access of k_rowPtrs_
4996  }
4997 
4998  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4999  std::unique_ptr<
5000  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5001  >
5002  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5003  computeCrsPadding(
5004  const RowGraph<LocalOrdinal,GlobalOrdinal,Node>& source,
5005  const size_t numSameIDs,
5006  const Kokkos::DualView<const local_ordinal_type*,
5007  buffer_device_type>& permuteToLIDs,
5008  const Kokkos::DualView<const local_ordinal_type*,
5009  buffer_device_type>& permuteFromLIDs,
5010  const bool verbose) const
5011  {
5012  using LO = local_ordinal_type;
5013  using std::endl;
5014 
5015  std::unique_ptr<std::string> prefix;
5016  if (verbose) {
5017  prefix = this->createPrefix("CrsGraph",
5018  "computeCrsPadding(same & permute)");
5019  std::ostringstream os;
5020  os << *prefix << "{numSameIDs: " << numSameIDs
5021  << ", numPermutes: " << permuteFromLIDs.extent(0) << "}"
5022  << endl;
5023  std::cerr << os.str();
5024  }
5025 
5026  const int myRank = [&] () {
5027  auto comm = rowMap_.is_null() ? Teuchos::null :
5028  rowMap_->getComm();
5029  return comm.is_null() ? -1 : comm->getRank();
5030  } ();
5031  std::unique_ptr<padding_type> padding(
5032  new padding_type(myRank, numSameIDs,
5033  permuteFromLIDs.extent(0)));
5034 
5035  computeCrsPaddingForSameIDs(*padding, source,
5036  static_cast<LO>(numSameIDs));
5037  computeCrsPaddingForPermutedIDs(*padding, source, permuteToLIDs,
5038  permuteFromLIDs);
5039  return padding;
5040  }
5041 
5042  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5043  void
5044  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5045  computeCrsPaddingForSameIDs(
5046  padding_type& padding,
5047  const RowGraph<local_ordinal_type, global_ordinal_type,
5048  node_type>& source,
5049  const local_ordinal_type numSameIDs) const
5050  {
5051  using LO = local_ordinal_type;
5052  using GO = global_ordinal_type;
5053  using Details::Impl::getRowGraphGlobalRow;
5054  using std::endl;
5055  const char tfecfFuncName[] = "computeCrsPaddingForSameIds";
5056 
5057  std::unique_ptr<std::string> prefix;
5058  const bool verbose = verbose_;
5059  if (verbose) {
5060  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5061  std::ostringstream os;
5062  os << *prefix << "numSameIDs: " << numSameIDs << endl;
5063  std::cerr << os.str();
5064  }
5065 
5066  if (numSameIDs == 0) {
5067  return;
5068  }
5069 
5070  const map_type& srcRowMap = *(source.getRowMap());
5071  const map_type& tgtRowMap = *rowMap_;
5072  using this_CRS_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
5073  const this_CRS_type* srcCrs = dynamic_cast<const this_CRS_type*>(&source);
5074  const bool src_is_unique =
5075  srcCrs == nullptr ? false : srcCrs->isMerged();
5076  const bool tgt_is_unique = this->isMerged();
5077 
5078  std::vector<GO> srcGblColIndsScratch;
5079  std::vector<GO> tgtGblColIndsScratch;
5080 
5081  execute_sync_host_uvm_access(); // protect host UVM access
5082  for (LO lclRowInd = 0; lclRowInd < numSameIDs; ++lclRowInd) {
5083  const GO srcGblRowInd = srcRowMap.getGlobalElement(lclRowInd);
5084  const GO tgtGblRowInd = tgtRowMap.getGlobalElement(lclRowInd);
5085  auto srcGblColInds = getRowGraphGlobalRow(
5086  srcGblColIndsScratch, source, srcGblRowInd);
5087  auto tgtGblColInds = getRowGraphGlobalRow(
5088  tgtGblColIndsScratch, *this, tgtGblRowInd);
5089  padding.update_same(lclRowInd, tgtGblColInds.getRawPtr(),
5090  tgtGblColInds.size(), tgt_is_unique,
5091  srcGblColInds.getRawPtr(),
5092  srcGblColInds.size(), src_is_unique);
5093  }
5094  if (verbose) {
5095  std::ostringstream os;
5096  os << *prefix << "Done" << endl;
5097  std::cerr << os.str();
5098  }
5099  }
5100 
5101  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5102  void
5103  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5104  computeCrsPaddingForPermutedIDs(
5105  padding_type& padding,
5106  const RowGraph<local_ordinal_type, global_ordinal_type,
5107  node_type>& source,
5108  const Kokkos::DualView<const local_ordinal_type*,
5109  buffer_device_type>& permuteToLIDs,
5110  const Kokkos::DualView<const local_ordinal_type*,
5111  buffer_device_type>& permuteFromLIDs) const
5112  {
5113  using LO = local_ordinal_type;
5114  using GO = global_ordinal_type;
5115  using Details::Impl::getRowGraphGlobalRow;
5116  using std::endl;
5117  const char tfecfFuncName[] = "computeCrsPaddingForPermutedIds";
5118 
5119  std::unique_ptr<std::string> prefix;
5120  const bool verbose = verbose_;
5121  if (verbose) {
5122  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5123  std::ostringstream os;
5124  os << *prefix << "permuteToLIDs.extent(0): "
5125  << permuteToLIDs.extent(0)
5126  << ", permuteFromLIDs.extent(0): "
5127  << permuteFromLIDs.extent(0) << endl;
5128  std::cerr << os.str();
5129  }
5130 
5131  if (permuteToLIDs.extent(0) == 0) {
5132  return;
5133  }
5134 
5135  const map_type& srcRowMap = *(source.getRowMap());
5136  const map_type& tgtRowMap = *rowMap_;
5137  using this_CRS_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
5138  const this_CRS_type* srcCrs = dynamic_cast<const this_CRS_type*>(&source);
5139  const bool src_is_unique =
5140  srcCrs == nullptr ? false : srcCrs->isMerged();
5141  const bool tgt_is_unique = this->isMerged();
5142 
5143  TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host() );
5144  auto permuteToLIDs_h = permuteToLIDs.view_host();
5145  TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host() );
5146  auto permuteFromLIDs_h = permuteFromLIDs.view_host();
5147 
5148  std::vector<GO> srcGblColIndsScratch;
5149  std::vector<GO> tgtGblColIndsScratch;
5150  const LO numPermutes = static_cast<LO>(permuteToLIDs_h.extent(0));
5151 
5152  execute_sync_host_uvm_access(); // protect host UVM access
5153  for (LO whichPermute = 0; whichPermute < numPermutes; ++whichPermute) {
5154  const LO srcLclRowInd = permuteFromLIDs_h[whichPermute];
5155  const GO srcGblRowInd = srcRowMap.getGlobalElement(srcLclRowInd);
5156  auto srcGblColInds = getRowGraphGlobalRow(
5157  srcGblColIndsScratch, source, srcGblRowInd);
5158  const LO tgtLclRowInd = permuteToLIDs_h[whichPermute];
5159  const GO tgtGblRowInd = tgtRowMap.getGlobalElement(tgtLclRowInd);
5160  auto tgtGblColInds = getRowGraphGlobalRow(
5161  tgtGblColIndsScratch, *this, tgtGblRowInd);
5162  padding.update_permute(whichPermute, tgtLclRowInd,
5163  tgtGblColInds.getRawPtr(),
5164  tgtGblColInds.size(), tgt_is_unique,
5165  srcGblColInds.getRawPtr(),
5166  srcGblColInds.size(), src_is_unique);
5167  }
5168 
5169  if (verbose) {
5170  std::ostringstream os;
5171  os << *prefix << "Done" << endl;
5172  std::cerr << os.str();
5173  }
5174  }
5175 
5176  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5177  std::unique_ptr<
5178  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5179  >
5180  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5181  computeCrsPaddingForImports(
5182  const Kokkos::DualView<const local_ordinal_type*,
5183  buffer_device_type>& importLIDs,
5184  Kokkos::DualView<packet_type*, buffer_device_type> imports,
5185  Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
5186  const bool verbose) const
5187  {
5188  using Details::Impl::getRowGraphGlobalRow;
5189  using std::endl;
5190  using LO = local_ordinal_type;
5191  using GO = global_ordinal_type;
5192  const char tfecfFuncName[] = "computeCrsPaddingForImports";
5193 
5194  std::unique_ptr<std::string> prefix;
5195  if (verbose) {
5196  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5197  std::ostringstream os;
5198  os << *prefix << "importLIDs.extent(0): "
5199  << importLIDs.extent(0)
5200  << ", imports.extent(0): "
5201  << imports.extent(0)
5202  << ", numPacketsPerLID.extent(0): "
5203  << numPacketsPerLID.extent(0) << endl;
5204  std::cerr << os.str();
5205  }
5206 
5207  const LO numImports = static_cast<LO>(importLIDs.extent(0));
5208  const int myRank = [&] () {
5209  auto comm = rowMap_.is_null() ? Teuchos::null :
5210  rowMap_->getComm();
5211  return comm.is_null() ? -1 : comm->getRank();
5212  } ();
5213  std::unique_ptr<padding_type> padding(
5214  new padding_type(myRank, numImports));
5215 
5216  if (imports.need_sync_host()) {
5217  imports.sync_host();
5218  }
5219  auto imports_h = imports.view_host();
5220  if (numPacketsPerLID.need_sync_host ()) {
5221  numPacketsPerLID.sync_host();
5222  }
5223  auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5224 
5225  TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
5226  auto importLIDs_h = importLIDs.view_host();
5227 
5228  const map_type& tgtRowMap = *rowMap_;
5229  // Always merge source column indices, since isMerged() is
5230  // per-process state, and we don't know its value on other
5231  // processes that sent us data.
5232  constexpr bool src_is_unique = false;
5233  const bool tgt_is_unique = isMerged();
5234 
5235  std::vector<GO> tgtGblColIndsScratch;
5236  size_t offset = 0;
5237  execute_sync_host_uvm_access(); // protect host UVM access
5238  for (LO whichImport = 0; whichImport < numImports; ++whichImport) {
5239  // CrsGraph packs just global column indices, while CrsMatrix
5240  // packs bytes (first the number of entries in the row, then the
5241  // global column indices, then other stuff like the matrix
5242  // values in that row).
5243  const LO origSrcNumEnt =
5244  static_cast<LO>(numPacketsPerLID_h[whichImport]);
5245  GO* const srcGblColInds = imports_h.data() + offset;
5246 
5247  const LO tgtLclRowInd = importLIDs_h[whichImport];
5248  const GO tgtGblRowInd =
5249  tgtRowMap.getGlobalElement(tgtLclRowInd);
5250  auto tgtGblColInds = getRowGraphGlobalRow(
5251  tgtGblColIndsScratch, *this, tgtGblRowInd);
5252  const size_t origTgtNumEnt(tgtGblColInds.size());
5253 
5254  padding->update_import(whichImport, tgtLclRowInd,
5255  tgtGblColInds.getRawPtr(),
5256  origTgtNumEnt, tgt_is_unique,
5257  srcGblColInds,
5258  origSrcNumEnt, src_is_unique);
5259  offset += origSrcNumEnt;
5260  }
5261 
5262  if (verbose) {
5263  std::ostringstream os;
5264  os << *prefix << "Done" << endl;
5265  std::cerr << os.str();
5266  }
5267  return padding;
5268  }
5269 
5270  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5271  std::unique_ptr<
5272  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5273  >
5274  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5275  computePaddingForCrsMatrixUnpack(
5276  const Kokkos::DualView<const local_ordinal_type*,
5277  buffer_device_type>& importLIDs,
5278  Kokkos::DualView<char*, buffer_device_type> imports,
5279  Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
5280  const bool verbose) const
5281  {
5282  using Details::Impl::getRowGraphGlobalRow;
5283  using Details::PackTraits;
5284  using std::endl;
5285  using LO = local_ordinal_type;
5286  using GO = global_ordinal_type;
5287  const char tfecfFuncName[] = "computePaddingForCrsMatrixUnpack";
5288 
5289  std::unique_ptr<std::string> prefix;
5290  if (verbose) {
5291  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5292  std::ostringstream os;
5293  os << *prefix << "importLIDs.extent(0): "
5294  << importLIDs.extent(0)
5295  << ", imports.extent(0): "
5296  << imports.extent(0)
5297  << ", numPacketsPerLID.extent(0): "
5298  << numPacketsPerLID.extent(0) << endl;
5299  std::cerr << os.str();
5300  }
5301  const bool extraVerbose =
5302  verbose && Details::Behavior::verbose("CrsPadding");
5303 
5304  const LO numImports = static_cast<LO>(importLIDs.extent(0));
5305  TEUCHOS_ASSERT( LO(numPacketsPerLID.extent(0)) >= numImports );
5306  const int myRank = [&] () {
5307  auto comm = rowMap_.is_null() ? Teuchos::null :
5308  rowMap_->getComm();
5309  return comm.is_null() ? -1 : comm->getRank();
5310  } ();
5311  std::unique_ptr<padding_type> padding(
5312  new padding_type(myRank, numImports));
5313 
5314  if (imports.need_sync_host()) {
5315  imports.sync_host();
5316  }
5317  auto imports_h = imports.view_host();
5318  if (numPacketsPerLID.need_sync_host ()) {
5319  numPacketsPerLID.sync_host();
5320  }
5321  auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5322 
5323  TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
5324  auto importLIDs_h = importLIDs.view_host();
5325 
5326  const map_type& tgtRowMap = *rowMap_;
5327  // Always merge source column indices, since isMerged() is
5328  // per-process state, and we don't know its value on other
5329  // processes that sent us data.
5330  constexpr bool src_is_unique = false;
5331  const bool tgt_is_unique = isMerged();
5332 
5333  std::vector<GO> srcGblColIndsScratch;
5334  std::vector<GO> tgtGblColIndsScratch;
5335  size_t offset = 0;
5336  execute_sync_host_uvm_access(); // protect host UVM access
5337  for (LO whichImport = 0; whichImport < numImports; ++whichImport) {
5338  // CrsGraph packs just global column indices, while CrsMatrix
5339  // packs bytes (first the number of entries in the row, then the
5340  // global column indices, then other stuff like the matrix
5341  // values in that row).
5342  const size_t numBytes = numPacketsPerLID_h[whichImport];
5343  if (extraVerbose) {
5344  std::ostringstream os;
5345  os << *prefix << "whichImport=" << whichImport
5346  << ", numImports=" << numImports
5347  << ", numBytes=" << numBytes << endl;
5348  std::cerr << os.str();
5349  }
5350  if (numBytes == 0) {
5351  continue; // special case: no entries to unpack for this row
5352  }
5353  LO origSrcNumEnt = 0;
5354  const size_t numEntBeg = offset;
5355  const size_t numEntLen =
5356  PackTraits<LO>::packValueCount(origSrcNumEnt);
5357  TEUCHOS_ASSERT( numBytes >= numEntLen );
5358  TEUCHOS_ASSERT( imports_h.extent(0) >= numEntBeg + numEntLen );
5359  PackTraits<LO>::unpackValue(origSrcNumEnt,
5360  imports_h.data() + numEntBeg);
5361  if (extraVerbose) {
5362  std::ostringstream os;
5363  os << *prefix << "whichImport=" << whichImport
5364  << ", numImports=" << numImports
5365  << ", origSrcNumEnt=" << origSrcNumEnt << endl;
5366  std::cerr << os.str();
5367  }
5368  TEUCHOS_ASSERT( origSrcNumEnt >= LO(0) );
5369  TEUCHOS_ASSERT( numBytes >= size_t(numEntLen + origSrcNumEnt * sizeof(GO)) );
5370  const size_t gidsBeg = numEntBeg + numEntLen;
5371  if (srcGblColIndsScratch.size() < size_t(origSrcNumEnt)) {
5372  srcGblColIndsScratch.resize(origSrcNumEnt);
5373  }
5374  GO* const srcGblColInds = srcGblColIndsScratch.data();
5375  PackTraits<GO>::unpackArray(srcGblColInds,
5376  imports_h.data() + gidsBeg,
5377  origSrcNumEnt);
5378  const LO tgtLclRowInd = importLIDs_h[whichImport];
5379  const GO tgtGblRowInd =
5380  tgtRowMap.getGlobalElement(tgtLclRowInd);
5381  auto tgtGblColInds = getRowGraphGlobalRow(
5382  tgtGblColIndsScratch, *this, tgtGblRowInd);
5383  const size_t origNumTgtEnt(tgtGblColInds.size());
5384 
5385  if (extraVerbose) {
5386  std::ostringstream os;
5387  os << *prefix << "whichImport=" << whichImport
5388  << ", numImports=" << numImports
5389  << ": Call padding->update_import" << endl;
5390  std::cerr << os.str();
5391  }
5392  padding->update_import(whichImport, tgtLclRowInd,
5393  tgtGblColInds.getRawPtr(),
5394  origNumTgtEnt, tgt_is_unique,
5395  srcGblColInds,
5396  origSrcNumEnt, src_is_unique);
5397  offset += numBytes;
5398  }
5399 
5400  if (verbose) {
5401  std::ostringstream os;
5402  os << *prefix << "Done" << endl;
5403  std::cerr << os.str();
5404  }
5405  return padding;
5406  }
5407 
5408  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5409  void
5410  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5411  packAndPrepare
5412  (const SrcDistObject& source,
5413  const Kokkos::DualView<const local_ordinal_type*,
5414  buffer_device_type>& exportLIDs,
5415  Kokkos::DualView<packet_type*,
5416  buffer_device_type>& exports,
5417  Kokkos::DualView<size_t*,
5418  buffer_device_type> numPacketsPerLID,
5419  size_t& constantNumPackets)
5420  {
5422  using GO = global_ordinal_type;
5423  using std::endl;
5424  using crs_graph_type =
5425  CrsGraph<local_ordinal_type, global_ordinal_type, node_type>;
5426  const char tfecfFuncName[] = "packAndPrepare: ";
5427  ProfilingRegion region_papn ("Tpetra::CrsGraph::packAndPrepare");
5428 
5429  const bool verbose = verbose_;
5430  std::unique_ptr<std::string> prefix;
5431  if (verbose) {
5432  prefix = this->createPrefix("CrsGraph", "packAndPrepare");
5433  std::ostringstream os;
5434  os << *prefix << "Start" << endl;
5435  std::cerr << os.str();
5436  }
5437 
5438  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5439  (exportLIDs.extent (0) != numPacketsPerLID.extent (0),
5440  std::runtime_error,
5441  "exportLIDs.extent(0) = " << exportLIDs.extent (0)
5442  << " != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent (0)
5443  << ".");
5444  const row_graph_type* srcRowGraphPtr =
5445  dynamic_cast<const row_graph_type*> (&source);
5446  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5447  (srcRowGraphPtr == nullptr, std::invalid_argument, "Source of an Export "
5448  "or Import operation to a CrsGraph must be a RowGraph with the same "
5449  "template parameters.");
5450  // We don't check whether src_graph has had fillComplete called,
5451  // because it doesn't matter whether the *source* graph has been
5452  // fillComplete'd. The target graph can not be fillComplete'd yet.
5453  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5454  (this->isFillComplete (), std::runtime_error,
5455  "The target graph of an Import or Export must not be fill complete.");
5456 
5457  const crs_graph_type* srcCrsGraphPtr =
5458  dynamic_cast<const crs_graph_type*> (&source);
5459 
5460  if (srcCrsGraphPtr == nullptr) {
5461  using Teuchos::ArrayView;
5462  using LO = local_ordinal_type;
5463 
5464  if (verbose) {
5465  std::ostringstream os;
5466  os << *prefix << "Source is a RowGraph but not a CrsGraph"
5467  << endl;
5468  std::cerr << os.str();
5469  }
5470  // RowGraph::pack serves the "old" DistObject interface. It
5471  // takes Teuchos::ArrayView and Teuchos::Array&. The latter
5472  // entails deep-copying the exports buffer on output. RowGraph
5473  // is a convenience interface when not a CrsGraph, so we accept
5474  // the performance hit.
5475  TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
5476  auto exportLIDs_h = exportLIDs.view_host ();
5477  ArrayView<const LO> exportLIDs_av (exportLIDs_h.data (),
5478  exportLIDs_h.extent (0));
5479  Teuchos::Array<GO> exports_a;
5480 
5481  numPacketsPerLID.clear_sync_state ();
5482  numPacketsPerLID.modify_host ();
5483  auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
5484  ArrayView<size_t> numPacketsPerLID_av (numPacketsPerLID_h.data (),
5485  numPacketsPerLID_h.extent (0));
5486  srcRowGraphPtr->pack (exportLIDs_av, exports_a, numPacketsPerLID_av,
5487  constantNumPackets);
5488  const size_t newSize = static_cast<size_t> (exports_a.size ());
5489  if (static_cast<size_t> (exports.extent (0)) != newSize) {
5490  using exports_dv_type = Kokkos::DualView<packet_type*, buffer_device_type>;
5491  exports = exports_dv_type ("exports", newSize);
5492  }
5493  Kokkos::View<const packet_type*, Kokkos::HostSpace,
5494  Kokkos::MemoryUnmanaged> exports_a_h (exports_a.getRawPtr (), newSize);
5495  exports.clear_sync_state ();
5496  exports.modify_host ();
5497  // DEEP_COPY REVIEW - NOT TESTED
5498  Kokkos::deep_copy (exports.view_host (), exports_a_h);
5499  }
5500  // packCrsGraphNew requires k_rowPtrsPacked_ to be set
5501  else if (! getColMap ().is_null () &&
5502  (rowPtrsPacked_dev_.extent (0) != 0 ||
5503  getRowMap ()->getLocalNumElements () == 0)) {
5504  if (verbose) {
5505  std::ostringstream os;
5506  os << *prefix << "packCrsGraphNew path" << endl;
5507  std::cerr << os.str();
5508  }
5509  using export_pids_type =
5510  Kokkos::DualView<const int*, buffer_device_type>;
5511  export_pids_type exportPIDs; // not filling it; needed for syntax
5512  using LO = local_ordinal_type;
5513  using NT = node_type;
5515  packCrsGraphNew<LO,GO,NT> (*srcCrsGraphPtr, exportLIDs, exportPIDs,
5516  exports, numPacketsPerLID,
5517  constantNumPackets, false);
5518  }
5519  else {
5520  srcCrsGraphPtr->packFillActiveNew (exportLIDs, exports, numPacketsPerLID,
5521  constantNumPackets);
5522  }
5523 
5524  if (verbose) {
5525  std::ostringstream os;
5526  os << *prefix << "Done" << endl;
5527  std::cerr << os.str();
5528  }
5529  }
5530 
5531  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5532  void
5534  pack (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5535  Teuchos::Array<GlobalOrdinal>& exports,
5536  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5537  size_t& constantNumPackets) const
5538  {
5539  auto col_map = this->getColMap();
5540  // packCrsGraph requires k_rowPtrsPacked to be set
5541  if( !col_map.is_null() && (rowPtrsPacked_dev_.extent(0) != 0 || getRowMap()->getLocalNumElements() ==0)) {
5543  packCrsGraph<LocalOrdinal,GlobalOrdinal,Node>(*this, exports, numPacketsPerLID,
5544  exportLIDs, constantNumPackets);
5545  }
5546  else {
5547  this->packFillActive(exportLIDs, exports, numPacketsPerLID,
5548  constantNumPackets);
5549  }
5550  }
5551 
5552  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5553  void
5555  packFillActive (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5556  Teuchos::Array<GlobalOrdinal>& exports,
5557  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5558  size_t& constantNumPackets) const
5559  {
5560  using std::endl;
5561  using LO = LocalOrdinal;
5562  using GO = GlobalOrdinal;
5563  using host_execution_space =
5564  typename Kokkos::View<size_t*, device_type>::
5565  HostMirror::execution_space;
5566  const char tfecfFuncName[] = "packFillActive: ";
5567  const bool verbose = verbose_;
5568 
5569  const auto numExportLIDs = exportLIDs.size ();
5570  std::unique_ptr<std::string> prefix;
5571  if (verbose) {
5572  prefix = this->createPrefix("CrsGraph", "allocateIndices");
5573  std::ostringstream os;
5574  os << *prefix << "numExportLIDs=" << numExportLIDs << endl;
5575  std::cerr << os.str();
5576  }
5577  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5578  (numExportLIDs != numPacketsPerLID.size (), std::runtime_error,
5579  "exportLIDs.size() = " << numExportLIDs << " != numPacketsPerLID.size()"
5580  " = " << numPacketsPerLID.size () << ".");
5581 
5582  const map_type& rowMap = * (this->getRowMap ());
5583  const map_type* const colMapPtr = this->colMap_.getRawPtr ();
5584  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5585  (this->isLocallyIndexed () && colMapPtr == nullptr, std::logic_error,
5586  "This graph claims to be locally indexed, but its column Map is nullptr. "
5587  "This should never happen. Please report this bug to the Tpetra "
5588  "developers.");
5589 
5590  // We may pack different amounts of data for different rows.
5591  constantNumPackets = 0;
5592 
5593  // mfh 20 Sep 2017: Teuchos::ArrayView isn't thread safe (well,
5594  // it might be now, but we might as well be safe).
5595  size_t* const numPacketsPerLID_raw = numPacketsPerLID.getRawPtr ();
5596  const LO* const exportLIDs_raw = exportLIDs.getRawPtr ();
5597 
5598  // Count the total number of packets (column indices, in the case
5599  // of a CrsGraph) to pack. While doing so, set
5600  // numPacketsPerLID[i] to the number of entries owned by the
5601  // calling process in (local) row exportLIDs[i] of the graph, that
5602  // the caller wants us to send out.
5603  Kokkos::RangePolicy<host_execution_space, LO> inputRange (0, numExportLIDs);
5604  size_t totalNumPackets = 0;
5605  size_t errCount = 0;
5606  // lambdas turn what they capture const, so we can't
5607  // atomic_add(&errCount,1). Instead, we need a View to modify.
5608  typedef Kokkos::Device<host_execution_space, Kokkos::HostSpace>
5609  host_device_type;
5610  Kokkos::View<size_t, host_device_type> errCountView (&errCount);
5611  constexpr size_t ONE = 1;
5612 
5613  execute_sync_host_uvm_access(); // protect host UVM access
5614  Kokkos::parallel_reduce ("Tpetra::CrsGraph::pack: totalNumPackets",
5615  inputRange,
5616  [=] (const LO& i, size_t& curTotalNumPackets) {
5617  const GO gblRow = rowMap.getGlobalElement (exportLIDs_raw[i]);
5618  if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
5619  Kokkos::atomic_add (&errCountView(), ONE);
5620  numPacketsPerLID_raw[i] = 0;
5621  }
5622  else {
5623  const size_t numEnt = this->getNumEntriesInGlobalRow (gblRow);
5624  numPacketsPerLID_raw[i] = numEnt;
5625  curTotalNumPackets += numEnt;
5626  }
5627  },
5628  totalNumPackets);
5629 
5630  if (verbose) {
5631  std::ostringstream os;
5632  os << *prefix << "totalNumPackets=" << totalNumPackets << endl;
5633  std::cerr << os.str();
5634  }
5635  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5636  (errCount != 0, std::logic_error, "totalNumPackets count encountered "
5637  "one or more errors! errCount = " << errCount
5638  << ", totalNumPackets = " << totalNumPackets << ".");
5639  errCount = 0;
5640 
5641  // Allocate space for all the column indices to pack.
5642  exports.resize (totalNumPackets);
5643 
5644  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5645  (! this->supportsRowViews (), std::logic_error,
5646  "this->supportsRowViews() returns false; this should never happen. "
5647  "Please report this bug to the Tpetra developers.");
5648 
5649  // Loop again over the rows to export, and pack rows of indices
5650  // into the output buffer.
5651 
5652  if (verbose) {
5653  std::ostringstream os;
5654  os << *prefix << "Pack into exports" << endl;
5655  std::cerr << os.str();
5656  }
5657 
5658  // Teuchos::ArrayView may not be thread safe, or may not be
5659  // efficiently thread safe. Better to use the raw pointer.
5660  GO* const exports_raw = exports.getRawPtr ();
5661  errCount = 0;
5662  Kokkos::parallel_scan ("Tpetra::CrsGraph::pack: pack from views",
5663  inputRange, [=, &prefix]
5664  (const LO i, size_t& exportsOffset, const bool final) {
5665  const size_t curOffset = exportsOffset;
5666  const GO gblRow = rowMap.getGlobalElement (exportLIDs_raw[i]);
5667  const RowInfo rowInfo =
5668  this->getRowInfoFromGlobalRowIndex (gblRow);
5669 
5670  using TDO = Tpetra::Details::OrdinalTraits<size_t>;
5671  if (rowInfo.localRow == TDO::invalid ()) {
5672  if (verbose) {
5673  std::ostringstream os;
5674  os << *prefix << ": INVALID rowInfo: i=" << i
5675  << ", lclRow=" << exportLIDs_raw[i] << endl;
5676  std::cerr << os.str();
5677  }
5678  Kokkos::atomic_add (&errCountView(), ONE);
5679  }
5680  else if (curOffset + rowInfo.numEntries > totalNumPackets) {
5681  if (verbose) {
5682  std::ostringstream os;
5683  os << *prefix << ": UH OH! For i=" << i << ", lclRow="
5684  << exportLIDs_raw[i] << ", gblRow=" << gblRow << ", curOffset "
5685  "(= " << curOffset << ") + numEnt (= " << rowInfo.numEntries
5686  << ") > totalNumPackets (= " << totalNumPackets << ")."
5687  << endl;
5688  std::cerr << os.str();
5689  }
5690  Kokkos::atomic_add (&errCountView(), ONE);
5691  }
5692  else {
5693  const LO numEnt = static_cast<LO> (rowInfo.numEntries);
5694  if (this->isLocallyIndexed ()) {
5695  auto lclColInds = getLocalIndsViewHost (rowInfo);
5696  if (final) {
5697  for (LO k = 0; k < numEnt; ++k) {
5698  const LO lclColInd = lclColInds(k);
5699  const GO gblColInd = colMapPtr->getGlobalElement (lclColInd);
5700  // Pack it, even if it's wrong. Let the receiving
5701  // process deal with it. Otherwise, we'll miss out
5702  // on any correct data.
5703  exports_raw[curOffset + k] = gblColInd;
5704  } // for each entry in the row
5705  } // final pass?
5706  exportsOffset = curOffset + numEnt;
5707  }
5708  else if (this->isGloballyIndexed ()) {
5709  auto gblColInds = getGlobalIndsViewHost (rowInfo);
5710  if (final) {
5711  for (LO k = 0; k < numEnt; ++k) {
5712  const GO gblColInd = gblColInds(k);
5713  // Pack it, even if it's wrong. Let the receiving
5714  // process deal with it. Otherwise, we'll miss out
5715  // on any correct data.
5716  exports_raw[curOffset + k] = gblColInd;
5717  } // for each entry in the row
5718  } // final pass?
5719  exportsOffset = curOffset + numEnt;
5720  }
5721  // If neither globally nor locally indexed, then the graph
5722  // has no entries in this row (or indeed, in any row on this
5723  // process) to pack.
5724  }
5725  });
5726 
5727  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5728  (errCount != 0, std::logic_error, "Packing encountered "
5729  "one or more errors! errCount = " << errCount
5730  << ", totalNumPackets = " << totalNumPackets << ".");
5731 
5732  if (verbose) {
5733  std::ostringstream os;
5734  os << *prefix << "Done" << endl;
5735  std::cerr << os.str();
5736  }
5737  }
5738 
5739  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5740  void
5741  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5742  packFillActiveNew (const Kokkos::DualView<const local_ordinal_type*,
5743  buffer_device_type>& exportLIDs,
5744  Kokkos::DualView<packet_type*,
5745  buffer_device_type>& exports,
5746  Kokkos::DualView<size_t*,
5747  buffer_device_type> numPacketsPerLID,
5748  size_t& constantNumPackets) const
5749  {
5750  using std::endl;
5751  using LO = local_ordinal_type;
5752  using GO = global_ordinal_type;
5753  using host_execution_space = typename Kokkos::View<size_t*,
5754  device_type>::HostMirror::execution_space;
5755  using host_device_type =
5756  Kokkos::Device<host_execution_space, Kokkos::HostSpace>;
5757  using exports_dv_type =
5758  Kokkos::DualView<packet_type*, buffer_device_type>;
5759  const char tfecfFuncName[] = "packFillActiveNew: ";
5760  const bool verbose = verbose_;
5761 
5762  const auto numExportLIDs = exportLIDs.extent (0);
5763  std::unique_ptr<std::string> prefix;
5764  if (verbose) {
5765  prefix = this->createPrefix("CrsGraph", "packFillActiveNew");
5766  std::ostringstream os;
5767  os << *prefix << "numExportLIDs: " << numExportLIDs
5768  << ", numPacketsPerLID.extent(0): "
5769  << numPacketsPerLID.extent(0) << endl;
5770  std::cerr << os.str();
5771  }
5772  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5773  (numExportLIDs != numPacketsPerLID.extent (0), std::runtime_error,
5774  "exportLIDs.extent(0) = " << numExportLIDs
5775  << " != numPacketsPerLID.extent(0) = "
5776  << numPacketsPerLID.extent (0) << ".");
5777  TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
5778  auto exportLIDs_h = exportLIDs.view_host ();
5779 
5780  const map_type& rowMap = * (this->getRowMap ());
5781  const map_type* const colMapPtr = this->colMap_.getRawPtr ();
5782  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5783  (this->isLocallyIndexed () && colMapPtr == nullptr, std::logic_error,
5784  "This graph claims to be locally indexed, but its column Map is nullptr. "
5785  "This should never happen. Please report this bug to the Tpetra "
5786  "developers.");
5787 
5788  // We may pack different amounts of data for different rows.
5789  constantNumPackets = 0;
5790 
5791  numPacketsPerLID.clear_sync_state ();
5792  numPacketsPerLID.modify_host ();
5793  auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
5794 
5795  // Count the total number of packets (column indices, in the case
5796  // of a CrsGraph) to pack. While doing so, set
5797  // numPacketsPerLID[i] to the number of entries owned by the
5798  // calling process in (local) row exportLIDs[i] of the graph, that
5799  // the caller wants us to send out.
5800  using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
5801  range_type inputRange (0, numExportLIDs);
5802  size_t totalNumPackets = 0;
5803  size_t errCount = 0;
5804  // lambdas turn what they capture const, so we can't
5805  // atomic_add(&errCount,1). Instead, we need a View to modify.
5806  Kokkos::View<size_t, host_device_type> errCountView (&errCount);
5807  constexpr size_t ONE = 1;
5808 
5809  if (verbose) {
5810  std::ostringstream os;
5811  os << *prefix << "Compute totalNumPackets" << endl;
5812  std::cerr << os.str ();
5813  }
5814 
5815  execute_sync_host_uvm_access(); // protect host UVM access
5816  Kokkos::parallel_reduce
5817  ("Tpetra::CrsGraph::pack: totalNumPackets",
5818  inputRange,
5819  [=, &prefix] (const LO i, size_t& curTotalNumPackets) {
5820  const LO lclRow = exportLIDs_h[i];
5821  const GO gblRow = rowMap.getGlobalElement (lclRow);
5822  if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
5823  if (verbose) {
5824  std::ostringstream os;
5825  os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5826  << " not in row Map on this process" << endl;
5827  std::cerr << os.str();
5828  }
5829  Kokkos::atomic_add (&errCountView(), ONE);
5830  numPacketsPerLID_h(i) = 0;
5831  }
5832  else {
5833  const size_t numEnt = this->getNumEntriesInGlobalRow (gblRow);
5834  numPacketsPerLID_h(i) = numEnt;
5835  curTotalNumPackets += numEnt;
5836  }
5837  },
5838  totalNumPackets);
5839 
5840  if (verbose) {
5841  std::ostringstream os;
5842  os << *prefix << "totalNumPackets: " << totalNumPackets
5843  << ", errCount: " << errCount << endl;
5844  std::cerr << os.str ();
5845  }
5846  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5847  (errCount != 0, std::logic_error, "totalNumPackets count encountered "
5848  "one or more errors! totalNumPackets: " << totalNumPackets
5849  << ", errCount: " << errCount << ".");
5850 
5851  // Allocate space for all the column indices to pack.
5852  if (size_t(exports.extent (0)) < totalNumPackets) {
5853  // FIXME (mfh 09 Apr 2019) Create without initializing.
5854  exports = exports_dv_type ("exports", totalNumPackets);
5855  }
5856 
5857  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5858  (! this->supportsRowViews (), std::logic_error,
5859  "this->supportsRowViews() returns false; this should never happen. "
5860  "Please report this bug to the Tpetra developers.");
5861 
5862  // Loop again over the rows to export, and pack rows of indices
5863  // into the output buffer.
5864 
5865  if (verbose) {
5866  std::ostringstream os;
5867  os << *prefix << "Pack into exports buffer" << endl;
5868  std::cerr << os.str();
5869  }
5870 
5871  exports.clear_sync_state ();
5872  exports.modify_host ();
5873  auto exports_h = exports.view_host ();
5874 
5875  errCount = 0;
5876  Kokkos::parallel_scan
5877  ("Tpetra::CrsGraph::packFillActiveNew: Pack exports",
5878  inputRange, [=, &prefix]
5879  (const LO i, size_t& exportsOffset, const bool final) {
5880  const size_t curOffset = exportsOffset;
5881  const LO lclRow = exportLIDs_h(i);
5882  const GO gblRow = rowMap.getGlobalElement (lclRow);
5883  if (gblRow == Details::OrdinalTraits<GO>::invalid ()) {
5884  if (verbose) {
5885  std::ostringstream os;
5886  os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5887  << " not in row Map on this process" << endl;
5888  std::cerr << os.str();
5889  }
5890  Kokkos::atomic_add (&errCountView(), ONE);
5891  return;
5892  }
5893 
5894  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (gblRow);
5895  if (rowInfo.localRow == Details::OrdinalTraits<size_t>::invalid ()) {
5896  if (verbose) {
5897  std::ostringstream os;
5898  os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5899  << ", gblRow=" << gblRow << ": invalid rowInfo"
5900  << endl;
5901  std::cerr << os.str();
5902  }
5903  Kokkos::atomic_add (&errCountView(), ONE);
5904  return;
5905  }
5906 
5907  if (curOffset + rowInfo.numEntries > totalNumPackets) {
5908  if (verbose) {
5909  std::ostringstream os;
5910  os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5911  << ", gblRow=" << gblRow << ", curOffset (= "
5912  << curOffset << ") + numEnt (= " << rowInfo.numEntries
5913  << ") > totalNumPackets (= " << totalNumPackets
5914  << ")." << endl;
5915  std::cerr << os.str();
5916  }
5917  Kokkos::atomic_add (&errCountView(), ONE);
5918  return;
5919  }
5920 
5921  const LO numEnt = static_cast<LO> (rowInfo.numEntries);
5922  if (this->isLocallyIndexed ()) {
5923  auto lclColInds = getLocalIndsViewHost(rowInfo);
5924  if (final) {
5925  for (LO k = 0; k < numEnt; ++k) {
5926  const LO lclColInd = lclColInds(k);
5927  const GO gblColInd = colMapPtr->getGlobalElement (lclColInd);
5928  // Pack it, even if it's wrong. Let the receiving
5929  // process deal with it. Otherwise, we'll miss out
5930  // on any correct data.
5931  exports_h(curOffset + k) = gblColInd;
5932  } // for each entry in the row
5933  } // final pass?
5934  exportsOffset = curOffset + numEnt;
5935  }
5936  else if (this->isGloballyIndexed ()) {
5937  auto gblColInds = getGlobalIndsViewHost(rowInfo);
5938  if (final) {
5939  for (LO k = 0; k < numEnt; ++k) {
5940  const GO gblColInd = gblColInds(k);
5941  // Pack it, even if it's wrong. Let the receiving
5942  // process deal with it. Otherwise, we'll miss out
5943  // on any correct data.
5944  exports_h(curOffset + k) = gblColInd;
5945  } // for each entry in the row
5946  } // final pass?
5947  exportsOffset = curOffset + numEnt;
5948  }
5949  // If neither globally nor locally indexed, then the graph
5950  // has no entries in this row (or indeed, in any row on this
5951  // process) to pack.
5952  });
5953 
5954  // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5955  // (errCount != 0, std::logic_error, "Packing encountered "
5956  // "one or more errors! errCount = " << errCount
5957  // << ", totalNumPackets = " << totalNumPackets << ".");
5958 
5959  if (verbose) {
5960  std::ostringstream os;
5961  os << *prefix << "errCount=" << errCount << "; Done" << endl;
5962  std::cerr << os.str();
5963  }
5964  }
5965 
5966  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5967  void
5968  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5969  unpackAndCombine
5970  (const Kokkos::DualView<const local_ordinal_type*,
5971  buffer_device_type>& importLIDs,
5972  Kokkos::DualView<packet_type*,
5973  buffer_device_type> imports,
5974  Kokkos::DualView<size_t*,
5975  buffer_device_type> numPacketsPerLID,
5976  const size_t /* constantNumPackets */,
5977  const CombineMode /* combineMode */ )
5978  {
5979  using Details::ProfilingRegion;
5980  using std::endl;
5981  using LO = local_ordinal_type;
5982  using GO = global_ordinal_type;
5983  const char tfecfFuncName[] = "unpackAndCombine";
5984 
5985  ProfilingRegion regionCGC("Tpetra::CrsGraph::unpackAndCombine");
5986  const bool verbose = verbose_;
5987 
5988  std::unique_ptr<std::string> prefix;
5989  if (verbose) {
5990  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5991  std::ostringstream os;
5992  os << *prefix << "Start" << endl;
5993  std::cerr << os.str ();
5994  }
5995  {
5996  auto padding = computeCrsPaddingForImports(
5997  importLIDs, imports, numPacketsPerLID, verbose);
5998  applyCrsPadding(*padding, verbose);
5999  if (verbose) {
6000  std::ostringstream os;
6001  os << *prefix << "Done computing & applying padding" << endl;
6002  std::cerr << os.str ();
6003  }
6004  }
6005 
6006  // FIXME (mfh 02 Apr 2012) REPLACE combine mode has a perfectly
6007  // reasonable meaning, whether or not the matrix is fill complete.
6008  // It's just more work to implement.
6009 
6010  // We are not checking the value of the CombineMode input
6011  // argument. For CrsGraph, we only support import/export
6012  // operations if fillComplete has not yet been called. Any
6013  // incoming column-indices are inserted into the target graph. In
6014  // this context, CombineMode values of ADD vs INSERT are
6015  // equivalent. What is the meaning of REPLACE for CrsGraph? If a
6016  // duplicate column-index is inserted, it will be compressed out
6017  // when fillComplete is called.
6018  //
6019  // Note: I think REPLACE means that an existing row is replaced by
6020  // the imported row, i.e., the existing indices are cleared. CGB,
6021  // 6/17/2010
6022 
6023  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6024  (importLIDs.extent (0) != numPacketsPerLID.extent (0),
6025  std::runtime_error, ": importLIDs.extent(0) = "
6026  << importLIDs.extent (0) << " != numPacketsPerLID.extent(0) = "
6027  << numPacketsPerLID.extent (0) << ".");
6028  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6029  (isFillComplete (), std::runtime_error,
6030  ": Import or Export operations are not allowed on a target "
6031  "CrsGraph that is fillComplete.");
6032 
6033  const size_t numImportLIDs(importLIDs.extent(0));
6034  if (numPacketsPerLID.need_sync_host()) {
6035  numPacketsPerLID.sync_host();
6036  }
6037  auto numPacketsPerLID_h = numPacketsPerLID.view_host();
6038  if (imports.need_sync_host()) {
6039  imports.sync_host();
6040  }
6041  auto imports_h = imports.view_host();
6042  TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
6043  auto importLIDs_h = importLIDs.view_host();
6044 
6045  // If we're inserting in local indices, let's pre-allocate
6046  Teuchos::Array<LO> lclColInds;
6047  if (isLocallyIndexed()) {
6048  if (verbose) {
6049  std::ostringstream os;
6050  os << *prefix << "Preallocate local indices scratch" << endl;
6051  std::cerr << os.str();
6052  }
6053  size_t maxNumInserts = 0;
6054  for (size_t i = 0; i < numImportLIDs; ++i) {
6055  maxNumInserts = std::max (maxNumInserts, numPacketsPerLID_h[i]);
6056  }
6057  if (verbose) {
6058  std::ostringstream os;
6059  os << *prefix << "Local indices scratch size: "
6060  << maxNumInserts << endl;
6061  std::cerr << os.str();
6062  }
6063  lclColInds.resize (maxNumInserts);
6064  }
6065  else {
6066  if (verbose) {
6067  std::ostringstream os;
6068  os << *prefix;
6069  if (isGloballyIndexed()) {
6070  os << "Graph is globally indexed";
6071  }
6072  else {
6073  os << "Graph is neither locally nor globally indexed";
6074  }
6075  os << endl;
6076  std::cerr << os.str();
6077  }
6078  }
6079 
6080  TEUCHOS_ASSERT( ! rowMap_.is_null() );
6081  const map_type& rowMap = *rowMap_;
6082 
6083  try {
6084  size_t importsOffset = 0;
6085  for (size_t i = 0; i < numImportLIDs; ++i) {
6086  if (verbose) {
6087  std::ostringstream os;
6088  os << *prefix << "i=" << i << ", numImportLIDs="
6089  << numImportLIDs << endl;
6090  std::cerr << os.str();
6091  }
6092  // We can only unpack into owned rows, since we only have
6093  // local row indices.
6094  const LO lclRow = importLIDs_h[i];
6095  const GO gblRow = rowMap.getGlobalElement(lclRow);
6096  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6097  (gblRow == Teuchos::OrdinalTraits<GO>::invalid(),
6098  std::logic_error, "importLIDs[i=" << i << "]="
6099  << lclRow << " is not in the row Map on the calling "
6100  "process.");
6101  const LO numEnt = numPacketsPerLID_h[i];
6102  const GO* const gblColInds = (numEnt == 0) ? nullptr :
6103  imports_h.data() + importsOffset;
6104  if (! isLocallyIndexed()) {
6105  insertGlobalIndicesFiltered(lclRow, gblColInds, numEnt);
6106  }
6107  else {
6108  // FIXME (mfh 09 Feb 2020) Now would be a good time to do
6109  // column Map filtering.
6110  for (LO j = 0; j < numEnt; j++) {
6111  lclColInds[j] = colMap_->getLocalElement(gblColInds[j]);
6112  }
6113  insertLocalIndices(lclRow, numEnt, lclColInds.data());
6114  }
6115  importsOffset += numEnt;
6116  }
6117  }
6118  catch (std::exception& e) {
6119  TEUCHOS_TEST_FOR_EXCEPTION
6120  (true, std::runtime_error,
6121  "Tpetra::CrsGraph::unpackAndCombine: Insert loop threw an "
6122  "exception: " << endl << e.what());
6123  }
6124 
6125  if (verbose) {
6126  std::ostringstream os;
6127  os << *prefix << "Done" << endl;
6128  std::cerr << os.str();
6129  }
6130  }
6131 
6132  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6133  void
6135  removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& newMap)
6136  {
6137  using Teuchos::Comm;
6138  using Teuchos::null;
6139  using Teuchos::ParameterList;
6140  using Teuchos::RCP;
6141 
6142  // We'll set all the state "transactionally," so that this method
6143  // satisfies the strong exception guarantee. This object's state
6144  // won't be modified until the end of this method.
6145  RCP<const map_type> rowMap, domainMap, rangeMap, colMap;
6146  RCP<import_type> importer;
6147  RCP<export_type> exporter;
6148 
6149  rowMap = newMap;
6150  RCP<const Comm<int> > newComm =
6151  (newMap.is_null ()) ? null : newMap->getComm ();
6152 
6153  if (! domainMap_.is_null ()) {
6154  if (domainMap_.getRawPtr () == rowMap_.getRawPtr ()) {
6155  // Common case: original domain and row Maps are identical.
6156  // In that case, we need only replace the original domain Map
6157  // with the new Map. This ensures that the new domain and row
6158  // Maps _stay_ identical.
6159  domainMap = newMap;
6160  } else {
6161  domainMap = domainMap_->replaceCommWithSubset (newComm);
6162  }
6163  }
6164  if (! rangeMap_.is_null ()) {
6165  if (rangeMap_.getRawPtr () == rowMap_.getRawPtr ()) {
6166  // Common case: original range and row Maps are identical. In
6167  // that case, we need only replace the original range Map with
6168  // the new Map. This ensures that the new range and row Maps
6169  // _stay_ identical.
6170  rangeMap = newMap;
6171  } else {
6172  rangeMap = rangeMap_->replaceCommWithSubset (newComm);
6173  }
6174  }
6175  if (! colMap_.is_null ()) {
6176  colMap = colMap_->replaceCommWithSubset (newComm);
6177  }
6178 
6179  // (Re)create the Export and / or Import if necessary.
6180  if (! newComm.is_null ()) {
6181  RCP<ParameterList> params = this->getNonconstParameterList (); // could be null
6182  //
6183  // The operations below are collective on the new communicator.
6184  //
6185  // (Re)create the Export object if necessary. If I haven't
6186  // called fillComplete yet, I don't have a rangeMap, so I must
6187  // first check if the _original_ rangeMap is not null. Ditto
6188  // for the Import object and the domain Map.
6189  if (! rangeMap_.is_null () &&
6190  rangeMap != rowMap &&
6191  ! rangeMap->isSameAs (*rowMap)) {
6192  if (params.is_null () || ! params->isSublist ("Export")) {
6193  exporter = rcp (new export_type (rowMap, rangeMap));
6194  }
6195  else {
6196  RCP<ParameterList> exportSublist = sublist (params, "Export", true);
6197  exporter = rcp (new export_type (rowMap, rangeMap, exportSublist));
6198  }
6199  }
6200  // (Re)create the Import object if necessary.
6201  if (! domainMap_.is_null () &&
6202  domainMap != colMap &&
6203  ! domainMap->isSameAs (*colMap)) {
6204  if (params.is_null () || ! params->isSublist ("Import")) {
6205  importer = rcp (new import_type (domainMap, colMap));
6206  } else {
6207  RCP<ParameterList> importSublist = sublist (params, "Import", true);
6208  importer = rcp (new import_type (domainMap, colMap, importSublist));
6209  }
6210  }
6211  } // if newComm is not null
6212 
6213  // Defer side effects until the end. If no destructors throw
6214  // exceptions (they shouldn't anyway), then this method satisfies
6215  // the strong exception guarantee.
6216  exporter_ = exporter;
6217  importer_ = importer;
6218  rowMap_ = rowMap;
6219  // mfh 31 Mar 2013: DistObject's map_ is the row Map of a CrsGraph
6220  // or CrsMatrix. CrsGraph keeps a redundant pointer (rowMap_) to
6221  // the same object. We might want to get rid of this redundant
6222  // pointer sometime, but for now, we'll leave it alone and just
6223  // set map_ to the same object.
6224  this->map_ = rowMap;
6225  domainMap_ = domainMap;
6226  rangeMap_ = rangeMap;
6227  colMap_ = colMap;
6228  }
6229 
6230  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6231  void
6233  getLocalDiagOffsets (const Kokkos::View<size_t*, device_type, Kokkos::MemoryUnmanaged>& offsets) const
6234  {
6235  using std::endl;
6236  using LO = LocalOrdinal;
6237  using GO = GlobalOrdinal;
6238  const char tfecfFuncName[] = "getLocalDiagOffsets: ";
6239  const bool verbose = verbose_;
6240 
6241  std::unique_ptr<std::string> prefix;
6242  if (verbose) {
6243  prefix = this->createPrefix("CrsGraph", "getLocalDiagOffsets");
6244  std::ostringstream os;
6245  os << *prefix << "offsets.extent(0)=" << offsets.extent(0)
6246  << endl;
6247  std::cerr << os.str();
6248  }
6249 
6250  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6251  (! hasColMap (), std::runtime_error, "The graph must have a column Map.");
6252  const LO lclNumRows = static_cast<LO> (this->getLocalNumRows ());
6253  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6254  (static_cast<LO> (offsets.extent (0)) < lclNumRows,
6255  std::invalid_argument, "offsets.extent(0) = " <<
6256  offsets.extent (0) << " < getLocalNumRows() = " << lclNumRows << ".");
6257 
6258  const map_type& rowMap = * (this->getRowMap ());
6259  const map_type& colMap = * (this->getColMap ());
6260 
6261  // We only use these in debug mode, but since debug mode is a
6262  // run-time option, they need to exist here. That's why we create
6263  // the vector with explicit size zero, to avoid overhead if debug
6264  // mode is off.
6265  bool allRowMapDiagEntriesInColMap = true;
6266  bool allDiagEntriesFound = true;
6267  bool allOffsetsCorrect = true;
6268  bool noOtherWeirdness = true;
6269  using wrong_offsets_type = std::vector<std::pair<LO, size_t> >;
6270  wrong_offsets_type wrongOffsets(0);
6271 
6272  // mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just
6273  // the subset of Map functionality that we need below.
6274  auto lclRowMap = rowMap.getLocalMap ();
6275  auto lclColMap = colMap.getLocalMap ();
6276 
6277  // FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this
6278  // setup, at least on the host. For CUDA, we have to use LocalMap
6279  // (that comes from each of the two Maps).
6280 
6281  const bool sorted = this->isSorted ();
6282  if (isFillComplete ()) {
6283  auto lclGraph = this->getLocalGraphDevice ();
6284  ::Tpetra::Details::getGraphDiagOffsets (offsets, lclRowMap, lclColMap,
6285  lclGraph.row_map,
6286  lclGraph.entries, sorted);
6287  }
6288  else {
6289  // NOTE (mfh 22 Feb 2017): We have to run this code on host,
6290  // since the graph is not fill complete. The previous version
6291  // of this code assumed UVM; this version does not.
6292  auto offsets_h = Kokkos::create_mirror_view (offsets);
6293 
6294  for (LO lclRowInd = 0; lclRowInd < lclNumRows; ++lclRowInd) {
6295  // Find the diagonal entry. Since the row Map and column Map
6296  // may differ, we have to compare global row and column
6297  // indices, not local.
6298  const GO gblRowInd = lclRowMap.getGlobalElement (lclRowInd);
6299  const GO gblColInd = gblRowInd;
6300  const LO lclColInd = lclColMap.getLocalElement (gblColInd);
6301 
6302  if (lclColInd == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
6303  allRowMapDiagEntriesInColMap = false;
6304  offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
6305  }
6306  else {
6307  const RowInfo rowInfo = this->getRowInfo (lclRowInd);
6308  if (static_cast<LO> (rowInfo.localRow) == lclRowInd &&
6309  rowInfo.numEntries > 0) {
6310 
6311  auto colInds = this->getLocalIndsViewHost (rowInfo);
6312  const size_t hint = 0; // not needed for this algorithm
6313  const size_t offset =
6314  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
6315  lclColInd, hint, sorted);
6316  offsets_h(lclRowInd) = offset;
6317 
6318  if (debug_) {
6319  // Now that we have what we think is an offset, make sure
6320  // that it really does point to the diagonal entry. Offsets
6321  // are _relative_ to each row, not absolute (for the whole
6322  // (local) graph).
6323  typename local_inds_dualv_type::t_host::const_type lclColInds;
6324  try {
6325  lclColInds = this->getLocalIndsViewHost (rowInfo);
6326  }
6327  catch (...) {
6328  noOtherWeirdness = false;
6329  }
6330  // Don't continue with error checking if the above failed.
6331  if (noOtherWeirdness) {
6332  const size_t numEnt = lclColInds.extent (0);
6333  if (offset >= numEnt) {
6334  // Offsets are relative to each row, so this means that
6335  // the offset is out of bounds.
6336  allOffsetsCorrect = false;
6337  wrongOffsets.push_back (std::make_pair (lclRowInd, offset));
6338  } else {
6339  const LO actualLclColInd = lclColInds(offset);
6340  const GO actualGblColInd = lclColMap.getGlobalElement (actualLclColInd);
6341  if (actualGblColInd != gblColInd) {
6342  allOffsetsCorrect = false;
6343  wrongOffsets.push_back (std::make_pair (lclRowInd, offset));
6344  }
6345  }
6346  }
6347  } // debug_
6348  }
6349  else { // either row is empty, or something went wrong w/ getRowInfo()
6350  offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
6351  allDiagEntriesFound = false;
6352  }
6353  } // whether lclColInd is a valid local column index
6354  } // for each local row
6355  // DEEP_COPY REVIEW - NOT TESTED
6356  Kokkos::deep_copy (offsets, offsets_h);
6357  } // whether the graph is fill complete
6358 
6359  if (verbose && wrongOffsets.size () != 0) {
6360  std::ostringstream os;
6361  os << *prefix << "Wrong offsets: [";
6362  for (size_t k = 0; k < wrongOffsets.size (); ++k) {
6363  os << "(" << wrongOffsets[k].first << ","
6364  << wrongOffsets[k].second << ")";
6365  if (k + 1 < wrongOffsets.size ()) {
6366  os << ", ";
6367  }
6368  }
6369  os << "]" << endl;
6370  std::cerr << os.str();
6371  }
6372 
6373  if (debug_) {
6374  using Teuchos::reduceAll;
6375  using std::endl;
6376  Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getComm ();
6377  const bool localSuccess =
6378  allRowMapDiagEntriesInColMap && allDiagEntriesFound && allOffsetsCorrect;
6379  const int numResults = 5;
6380  int lclResults[5];
6381  lclResults[0] = allRowMapDiagEntriesInColMap ? 1 : 0;
6382  lclResults[1] = allDiagEntriesFound ? 1 : 0;
6383  lclResults[2] = allOffsetsCorrect ? 1 : 0;
6384  lclResults[3] = noOtherWeirdness ? 1 : 0;
6385  // min-all-reduce will compute least rank of all the processes
6386  // that didn't succeed.
6387  lclResults[4] = ! localSuccess ? comm->getRank () : comm->getSize ();
6388 
6389  int gblResults[5];
6390  gblResults[0] = 0;
6391  gblResults[1] = 0;
6392  gblResults[2] = 0;
6393  gblResults[3] = 0;
6394  gblResults[4] = 0;
6395  reduceAll<int, int> (*comm, Teuchos::REDUCE_MIN,
6396  numResults, lclResults, gblResults);
6397 
6398  if (gblResults[0] != 1 || gblResults[1] != 1 || gblResults[2] != 1
6399  || gblResults[3] != 1) {
6400  std::ostringstream os; // build error message
6401  os << "Issue(s) that we noticed (on Process " << gblResults[4] << ", "
6402  "possibly among others): " << endl;
6403  if (gblResults[0] == 0) {
6404  os << " - The column Map does not contain at least one diagonal entry "
6405  "of the graph." << endl;
6406  }
6407  if (gblResults[1] == 0) {
6408  os << " - On one or more processes, some row does not contain a "
6409  "diagonal entry." << endl;
6410  }
6411  if (gblResults[2] == 0) {
6412  os << " - On one or more processes, some offsets are incorrect."
6413  << endl;
6414  }
6415  if (gblResults[3] == 0) {
6416  os << " - One or more processes had some other error."
6417  << endl;
6418  }
6419  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str());
6420  }
6421  } // debug_
6422  }
6423 
6424  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6425  void
6427  getLocalOffRankOffsets (offset_device_view_type& offsets) const
6428  {
6429  using std::endl;
6430  const char tfecfFuncName[] = "getLocalOffRankOffsets: ";
6431  const bool verbose = verbose_;
6432 
6433  std::unique_ptr<std::string> prefix;
6434  if (verbose) {
6435  prefix = this->createPrefix("CrsGraph", "getLocalOffRankOffsets");
6436  std::ostringstream os;
6437  os << *prefix << "offsets.extent(0)=" << offsets.extent(0)
6438  << endl;
6439  std::cerr << os.str();
6440  }
6441 
6442  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6443  (! hasColMap (), std::runtime_error, "The graph must have a column Map.");
6444  // Instead of throwing, we could also copy the rowPtr to k_offRankOffsets_.
6445 
6446  const size_t lclNumRows = this->getLocalNumRows ();
6447 
6448  if (haveLocalOffRankOffsets_ && k_offRankOffsets_.extent(0) == lclNumRows+1) {
6449  offsets = k_offRankOffsets_;
6450  return;
6451  }
6452  haveLocalOffRankOffsets_ = false;
6453  k_offRankOffsets_ = offset_device_view_type(Kokkos::ViewAllocateWithoutInitializing("offRankOffset"), lclNumRows+1);
6454  offsets = k_offRankOffsets_;
6455 
6456  const map_type& colMap = * (this->getColMap ());
6457  const map_type& domMap = * (this->getDomainMap ());
6458 
6459  // mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just
6460  // the subset of Map functionality that we need below.
6461  auto lclColMap = colMap.getLocalMap ();
6462  auto lclDomMap = domMap.getLocalMap ();
6463 
6464  // FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this
6465  // setup, at least on the host. For CUDA, we have to use LocalMap
6466  // (that comes from each of the two Maps).
6467 
6468  TEUCHOS_ASSERT(this->isSorted ());
6469  if (isFillComplete ()) {
6470  auto lclGraph = this->getLocalGraphDevice ();
6471  ::Tpetra::Details::getGraphOffRankOffsets (k_offRankOffsets_,
6472  lclColMap, lclDomMap,
6473  lclGraph);
6474  haveLocalOffRankOffsets_ = true;
6475  }
6476  }
6477 
6478  namespace { // (anonymous)
6479 
6480  // mfh 21 Jan 2016: This is useful for getLocalDiagOffsets (see
6481  // below). The point is to avoid the deep copy between the input
6482  // Teuchos::ArrayRCP and the internally used Kokkos::View. We
6483  // can't use UVM to avoid the deep copy with CUDA, because the
6484  // ArrayRCP is a host pointer, while the input to the graph's
6485  // getLocalDiagOffsets method is a device pointer. Assigning a
6486  // host pointer to a device pointer is incorrect unless the host
6487  // pointer points to host pinned memory. The goal is to get rid
6488  // of the Teuchos::ArrayRCP overload anyway, so we accept the deep
6489  // copy for backwards compatibility.
6490  //
6491  // We have to use template magic because
6492  // "staticGraph_->getLocalDiagOffsets(offsetsHosts)" won't compile
6493  // if device_type::memory_space is not Kokkos::HostSpace (as is
6494  // the case with CUDA).
6495 
6496  template<class DeviceType,
6497  const bool memSpaceIsHostSpace =
6498  std::is_same<typename DeviceType::memory_space,
6499  Kokkos::HostSpace>::value>
6500  struct HelpGetLocalDiagOffsets {};
6501 
6502  template<class DeviceType>
6503  struct HelpGetLocalDiagOffsets<DeviceType, true> {
6504  typedef DeviceType device_type;
6505  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6506  Kokkos::MemoryUnmanaged> device_offsets_type;
6507  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6508  Kokkos::MemoryUnmanaged> host_offsets_type;
6509 
6510  static device_offsets_type
6511  getDeviceOffsets (const host_offsets_type& hostOffsets)
6512  {
6513  // Host and device are the same; no need to allocate a
6514  // temporary device View.
6515  return hostOffsets;
6516  }
6517 
6518  static void
6519  copyBackIfNeeded (const host_offsets_type& /* hostOffsets */,
6520  const device_offsets_type& /* deviceOffsets */)
6521  { /* copy back not needed; host and device are the same */ }
6522  };
6523 
6524  template<class DeviceType>
6525  struct HelpGetLocalDiagOffsets<DeviceType, false> {
6526  typedef DeviceType device_type;
6527  // We have to do a deep copy, since host memory space != device
6528  // memory space. Thus, the device View is managed (we need to
6529  // allocate a temporary device View).
6530  typedef Kokkos::View<size_t*, device_type> device_offsets_type;
6531  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6532  Kokkos::MemoryUnmanaged> host_offsets_type;
6533 
6534  static device_offsets_type
6535  getDeviceOffsets (const host_offsets_type& hostOffsets)
6536  {
6537  // Host memory space != device memory space, so we must
6538  // allocate a temporary device View for the graph.
6539  return device_offsets_type ("offsets", hostOffsets.extent (0));
6540  }
6541 
6542  static void
6543  copyBackIfNeeded (const host_offsets_type& hostOffsets,
6544  const device_offsets_type& deviceOffsets)
6545  {
6546  // DEEP_COPY REVIEW - NOT TESTED
6547  Kokkos::deep_copy (hostOffsets, deviceOffsets);
6548  }
6549  };
6550  } // namespace (anonymous)
6551 
6552 
6553  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6554  void
6556  getLocalDiagOffsets (Teuchos::ArrayRCP<size_t>& offsets) const
6557  {
6558  typedef LocalOrdinal LO;
6559  const char tfecfFuncName[] = "getLocalDiagOffsets: ";
6560  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6561  (! this->hasColMap (), std::runtime_error,
6562  "The graph does not yet have a column Map.");
6563  const LO myNumRows = static_cast<LO> (this->getLocalNumRows ());
6564  if (static_cast<LO> (offsets.size ()) != myNumRows) {
6565  // NOTE (mfh 21 Jan 2016) This means that the method does not
6566  // satisfy the strong exception guarantee (no side effects
6567  // unless successful).
6568  offsets.resize (myNumRows);
6569  }
6570 
6571  // mfh 21 Jan 2016: This method unfortunately takes a
6572  // Teuchos::ArrayRCP, which is host memory. The graph wants a
6573  // device pointer. We can't access host memory from the device;
6574  // that's the wrong direction for UVM. (It's the right direction
6575  // for inefficient host pinned memory, but we don't want to use
6576  // that here.) Thus, if device memory space != host memory space,
6577  // we allocate and use a temporary device View to get the offsets.
6578  // If the two spaces are equal, the template magic makes the deep
6579  // copy go away.
6580  typedef HelpGetLocalDiagOffsets<device_type> helper_type;
6581  typedef typename helper_type::host_offsets_type host_offsets_type;
6582  // Unmanaged host View that views the output array.
6583  host_offsets_type hostOffsets (offsets.getRawPtr (), myNumRows);
6584  // Allocate temp device View if host != device, else reuse host array.
6585  auto deviceOffsets = helper_type::getDeviceOffsets (hostOffsets);
6586  // NOT recursion; this calls the overload that takes a device View.
6587  this->getLocalDiagOffsets (deviceOffsets);
6588  helper_type::copyBackIfNeeded (hostOffsets, deviceOffsets);
6589  }
6590 
6591  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6592  bool
6595  return true;
6596  }
6597 
6598  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6599  void
6602  const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& rowTransfer,
6603  const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node> > & domainTransfer,
6604  const Teuchos::RCP<const map_type>& domainMap,
6605  const Teuchos::RCP<const map_type>& rangeMap,
6606  const Teuchos::RCP<Teuchos::ParameterList>& params) const
6607  {
6612  using Teuchos::ArrayRCP;
6613  using Teuchos::ArrayView;
6614  using Teuchos::Comm;
6615  using Teuchos::ParameterList;
6616  using Teuchos::rcp;
6617  using Teuchos::RCP;
6618 #ifdef HAVE_TPETRA_MMM_TIMINGS
6619  using std::string;
6620  using Teuchos::TimeMonitor;
6621 #endif
6622 
6623  using LO = LocalOrdinal;
6624  using GO = GlobalOrdinal;
6625  using NT = node_type;
6626  using this_CRS_type = CrsGraph<LO, GO, NT>;
6627  using ivector_type = Vector<int, LO, GO, NT>;
6628 
6629  const char* prefix = "Tpetra::CrsGraph::transferAndFillComplete: ";
6630 
6631 #ifdef HAVE_TPETRA_MMM_TIMINGS
6632  string label;
6633  if(!params.is_null()) label = params->get("Timer Label", label);
6634  string prefix2 = string("Tpetra ")+ label + std::string(": CrsGraph TAFC ");
6635  RCP<TimeMonitor> MM =
6636  rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Pack-1"))));
6637 #endif
6638 <