Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_CrsGraph_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ************************************************************************
38 // @HEADER
39 
40 #ifndef TPETRA_CRSGRAPH_DEF_HPP
41 #define TPETRA_CRSGRAPH_DEF_HPP
42 
50 
55 #include "Tpetra_Details_getGraphDiagOffsets.hpp"
56 #include "Tpetra_Details_makeColMap.hpp"
60 #include "Tpetra_Distributor.hpp"
61 #include "Teuchos_SerialDenseMatrix.hpp"
62 #include "Tpetra_Vector.hpp"
63 #include "Tpetra_Import_Util.hpp"
64 #include "Tpetra_Import_Util2.hpp"
65 #include "Tpetra_Details_packCrsGraph.hpp"
66 #include "Tpetra_Details_unpackCrsGraphAndCombine.hpp"
67 #include "Tpetra_Details_CrsPadding.hpp"
68 #include "Tpetra_Util.hpp"
69 #include <algorithm>
70 #include <limits>
71 #include <map>
72 #include <sstream>
73 #include <string>
74 #include <type_traits>
75 #include <utility>
76 #include <vector>
77 
78 namespace Tpetra {
79  namespace Details {
80  namespace Impl {
81 
82  template<class MapIter>
83  void
84  verbosePrintMap(std::ostream& out,
85  MapIter beg,
86  MapIter end,
87  const size_t numEnt,
88  const char mapName[])
89  {
90  using ::Tpetra::Details::Behavior;
92 
93  out << mapName << ": {";
94  const size_t maxNumToPrint =
96  if (maxNumToPrint == 0) {
97  if (numEnt != 0) {
98  out << "...";
99  }
100  }
101  else {
102  const size_t numToPrint = numEnt > maxNumToPrint ?
103  maxNumToPrint : numEnt;
104  size_t count = 0;
105  for (MapIter it = beg; it != end; ++it) {
106  out << "(" << (*it).first << ", ";
107  verbosePrintArray(out, (*it).second, "gblColInds",
108  maxNumToPrint);
109  out << ")";
110  if (count + size_t(1) < numToPrint) {
111  out << ", ";
112  }
113  ++count;
114  }
115  if (count < numEnt) {
116  out << ", ...";
117  }
118  }
119  out << "}";
120  }
121 
122  template<class LO, class GO, class Node>
123  Teuchos::ArrayView<GO>
124  getRowGraphGlobalRow(
125  std::vector<GO>& gblColIndsStorage,
126  const RowGraph<LO, GO, Node>& graph,
127  const GO gblRowInd)
128  {
129  size_t origNumEnt = graph.getNumEntriesInGlobalRow(gblRowInd);
130  if (gblColIndsStorage.size() < origNumEnt) {
131  gblColIndsStorage.resize(origNumEnt);
132  }
133  Teuchos::ArrayView<GO> gblColInds(gblColIndsStorage.data(),
134  origNumEnt);
135  graph.getGlobalRowCopy(gblRowInd, gblColInds, origNumEnt);
136  return gblColInds;
137  }
138 
139  template<class LO, class GO, class DT, class OffsetType, class NumEntType>
140  class ConvertColumnIndicesFromGlobalToLocal {
141  public:
142  ConvertColumnIndicesFromGlobalToLocal (const ::Kokkos::View<LO*, DT>& lclColInds,
143  const ::Kokkos::View<const GO*, DT>& gblColInds,
144  const ::Kokkos::View<const OffsetType*, DT>& ptr,
145  const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
146  const ::Kokkos::View<const NumEntType*, DT>& numRowEnt) :
147  lclColInds_ (lclColInds),
148  gblColInds_ (gblColInds),
149  ptr_ (ptr),
150  lclColMap_ (lclColMap),
151  numRowEnt_ (numRowEnt)
152  {}
153 
154  KOKKOS_FUNCTION void
155  operator () (const LO& lclRow, OffsetType& curNumBad) const
156  {
157  const OffsetType offset = ptr_(lclRow);
158  // NOTE (mfh 26 Jun 2016) It's always legal to cast the number
159  // of entries in a row to LO, as long as the row doesn't have
160  // too many duplicate entries.
161  const LO numEnt = static_cast<LO> (numRowEnt_(lclRow));
162  for (LO j = 0; j < numEnt; ++j) {
163  const GO gid = gblColInds_(offset + j);
164  const LO lid = lclColMap_.getLocalElement (gid);
165  lclColInds_(offset + j) = lid;
166  if (lid == ::Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
167  ++curNumBad;
168  }
169  }
170  }
171 
172  static OffsetType
173  run (const ::Kokkos::View<LO*, DT>& lclColInds,
174  const ::Kokkos::View<const GO*, DT>& gblColInds,
175  const ::Kokkos::View<const OffsetType*, DT>& ptr,
176  const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
177  const ::Kokkos::View<const NumEntType*, DT>& numRowEnt)
178  {
179  typedef ::Kokkos::RangePolicy<typename DT::execution_space, LO> range_type;
180  typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> functor_type;
181 
182  const LO lclNumRows = ptr.extent (0) == 0 ?
183  static_cast<LO> (0) : static_cast<LO> (ptr.extent (0) - 1);
184  OffsetType numBad = 0;
185  // Count of "bad" column indices is a reduction over rows.
186  ::Kokkos::parallel_reduce (range_type (0, lclNumRows),
187  functor_type (lclColInds, gblColInds, ptr,
188  lclColMap, numRowEnt),
189  numBad);
190  return numBad;
191  }
192 
193  private:
194  ::Kokkos::View<LO*, DT> lclColInds_;
195  ::Kokkos::View<const GO*, DT> gblColInds_;
196  ::Kokkos::View<const OffsetType*, DT> ptr_;
198  ::Kokkos::View<const NumEntType*, DT> numRowEnt_;
199  };
200 
201  } // namespace Impl
202 
217  template<class LO, class GO, class DT, class OffsetType, class NumEntType>
218  OffsetType
219  convertColumnIndicesFromGlobalToLocal (const Kokkos::View<LO*, DT>& lclColInds,
220  const Kokkos::View<const GO*, DT>& gblColInds,
221  const Kokkos::View<const OffsetType*, DT>& ptr,
222  const LocalMap<LO, GO, DT>& lclColMap,
223  const Kokkos::View<const NumEntType*, DT>& numRowEnt)
224  {
225  using Impl::ConvertColumnIndicesFromGlobalToLocal;
226  typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> impl_type;
227  return impl_type::run (lclColInds, gblColInds, ptr, lclColMap, numRowEnt);
228  }
229 
230  template<class ViewType, class LO>
231  class MaxDifference {
232  public:
233  MaxDifference (const ViewType& ptr) : ptr_ (ptr) {}
234 
235  KOKKOS_INLINE_FUNCTION void init (LO& dst) const {
236  dst = 0;
237  }
238 
239  KOKKOS_INLINE_FUNCTION void
240  join (volatile LO& dst, const volatile LO& src) const
241  {
242  dst = (src > dst) ? src : dst;
243  }
244 
245  KOKKOS_INLINE_FUNCTION void
246  operator () (const LO lclRow, LO& maxNumEnt) const
247  {
248  const LO numEnt = static_cast<LO> (ptr_(lclRow+1) - ptr_(lclRow));
249  maxNumEnt = (numEnt > maxNumEnt) ? numEnt : maxNumEnt;
250  }
251  private:
252  typename ViewType::const_type ptr_;
253  };
254 
255  template<class ViewType, class LO>
256  typename ViewType::non_const_value_type
257  maxDifference (const char kernelLabel[],
258  const ViewType& ptr,
259  const LO lclNumRows)
260  {
261  if (lclNumRows == 0) {
262  // mfh 07 May 2018: Weirdly, I need this special case,
263  // otherwise I get the wrong answer.
264  return static_cast<LO> (0);
265  }
266  else {
267  using execution_space = typename ViewType::execution_space;
268  using range_type = Kokkos::RangePolicy<execution_space, LO>;
269  LO theMaxNumEnt {0};
270  Kokkos::parallel_reduce (kernelLabel,
271  range_type (0, lclNumRows),
272  MaxDifference<ViewType, LO> (ptr),
273  theMaxNumEnt);
274  return theMaxNumEnt;
275  }
276  }
277 
278  } // namespace Details
279 
280  template <class LocalOrdinal, class GlobalOrdinal, class Node>
281  bool
282  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
283  getDebug() {
284  return Details::Behavior::debug("CrsGraph");
285  }
286 
287  template <class LocalOrdinal, class GlobalOrdinal, class Node>
288  bool
289  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
290  getVerbose() {
291  return Details::Behavior::verbose("CrsGraph");
292  }
293 
294  template <class LocalOrdinal, class GlobalOrdinal, class Node>
295  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
296  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
297  const size_t maxNumEntriesPerRow,
298  const ProfileType /* pftype */,
299  const Teuchos::RCP<Teuchos::ParameterList>& params) :
300  dist_object_type (rowMap)
301  , rowMap_ (rowMap)
302  , numAllocForAllRows_ (maxNumEntriesPerRow)
303  {
304  const char tfecfFuncName[] =
305  "CrsGraph(rowMap,maxNumEntriesPerRow,pftype,params): ";
306  staticAssertions ();
307  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
308  (maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid (),
309  std::invalid_argument, "The allocation hint maxNumEntriesPerRow must be "
310  "a valid size_t value, which in this case means it must not be "
311  "Teuchos::OrdinalTraits<size_t>::invalid().");
312  resumeFill (params);
314  }
315 
316  template <class LocalOrdinal, class GlobalOrdinal, class Node>
318  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
319  const Teuchos::RCP<const map_type>& colMap,
320  const size_t maxNumEntriesPerRow,
321  const ProfileType /* pftype */,
322  const Teuchos::RCP<Teuchos::ParameterList>& params) :
323  dist_object_type (rowMap)
324  , rowMap_ (rowMap)
325  , colMap_ (colMap)
326  , numAllocForAllRows_ (maxNumEntriesPerRow)
327  {
328  const char tfecfFuncName[] =
329  "CrsGraph(rowMap,colMap,maxNumEntriesPerRow,pftype,params): ";
330  staticAssertions ();
331  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
332  maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid (),
333  std::invalid_argument, "The allocation hint maxNumEntriesPerRow must be "
334  "a valid size_t value, which in this case means it must not be "
335  "Teuchos::OrdinalTraits<size_t>::invalid().");
336  resumeFill (params);
338  }
339 
340  template <class LocalOrdinal, class GlobalOrdinal, class Node>
342  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
343  const Teuchos::ArrayView<const size_t>& numEntPerRow,
344  const ProfileType /* pftype */,
345  const Teuchos::RCP<Teuchos::ParameterList>& params) :
346  dist_object_type (rowMap)
347  , rowMap_ (rowMap)
348  , numAllocForAllRows_ (0)
349  {
350  const char tfecfFuncName[] =
351  "CrsGraph(rowMap,numEntPerRow,pftype,params): ";
352  staticAssertions ();
353 
354  const size_t lclNumRows = rowMap.is_null () ?
355  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
356  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
357  static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
358  std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
359  << " != the local number of rows " << lclNumRows << " as specified by "
360  "the input row Map.");
361 
362  if (debug_) {
363  for (size_t r = 0; r < lclNumRows; ++r) {
364  const size_t curRowCount = numEntPerRow[r];
365  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
366  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
367  std::invalid_argument, "numEntPerRow(" << r << ") "
368  "specifies an invalid number of entries "
369  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
370  }
371  }
372 
373  // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
374  // The latter is a const View, so we have to copy into a nonconst
375  // View first, then assign.
376  typedef decltype (k_numAllocPerRow_) out_view_type;
377  typedef typename out_view_type::non_const_type nc_view_type;
378  typedef Kokkos::View<const size_t*,
379  typename nc_view_type::array_layout,
380  Kokkos::HostSpace,
381  Kokkos::MemoryUnmanaged> in_view_type;
382  in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
383  nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
384  lclNumRows);
385  Kokkos::deep_copy (numAllocPerRowOut, numAllocPerRowIn);
386  k_numAllocPerRow_ = numAllocPerRowOut;
387 
388  resumeFill (params);
390  }
391 
392 
393 
394  template <class LocalOrdinal, class GlobalOrdinal, class Node>
396  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
397  const Kokkos::DualView<const size_t*, execution_space>& numEntPerRow,
398  const ProfileType /* pftype */,
399  const Teuchos::RCP<Teuchos::ParameterList>& params) :
400  dist_object_type (rowMap)
401  , rowMap_ (rowMap)
402  , k_numAllocPerRow_ (numEntPerRow.h_view)
403  , numAllocForAllRows_ (0)
404  {
405  const char tfecfFuncName[] =
406  "CrsGraph(rowMap,numEntPerRow,pftype,params): ";
407  staticAssertions ();
408 
409  const size_t lclNumRows = rowMap.is_null () ?
410  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
411  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
412  static_cast<size_t> (numEntPerRow.extent (0)) != lclNumRows,
413  std::invalid_argument, "numEntPerRow has length " <<
414  numEntPerRow.extent (0) << " != the local number of rows " <<
415  lclNumRows << " as specified by " "the input row Map.");
416 
417  if (debug_) {
418  for (size_t r = 0; r < lclNumRows; ++r) {
419  const size_t curRowCount = numEntPerRow.h_view(r);
420  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
421  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
422  std::invalid_argument, "numEntPerRow(" << r << ") "
423  "specifies an invalid number of entries "
424  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
425  }
426  }
427 
428  resumeFill (params);
430  }
431 
432 
433  template <class LocalOrdinal, class GlobalOrdinal, class Node>
435  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
436  const Teuchos::RCP<const map_type>& colMap,
437  const Kokkos::DualView<const size_t*, execution_space>& numEntPerRow,
438  const ProfileType /* pftype */,
439  const Teuchos::RCP<Teuchos::ParameterList>& params) :
440  dist_object_type (rowMap)
441  , rowMap_ (rowMap)
442  , colMap_ (colMap)
443  , k_numAllocPerRow_ (numEntPerRow.h_view)
444  , numAllocForAllRows_ (0)
445  {
446  const char tfecfFuncName[] =
447  "CrsGraph(rowMap,colMap,numEntPerRow,pftype,params): ";
448  staticAssertions ();
449 
450  const size_t lclNumRows = rowMap.is_null () ?
451  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
452  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
453  static_cast<size_t> (numEntPerRow.extent (0)) != lclNumRows,
454  std::invalid_argument, "numEntPerRow has length " <<
455  numEntPerRow.extent (0) << " != the local number of rows " <<
456  lclNumRows << " as specified by " "the input row Map.");
457 
458  if (debug_) {
459  for (size_t r = 0; r < lclNumRows; ++r) {
460  const size_t curRowCount = numEntPerRow.h_view(r);
461  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
462  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
463  std::invalid_argument, "numEntPerRow(" << r << ") "
464  "specifies an invalid number of entries "
465  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
466  }
467  }
468 
469  resumeFill (params);
471  }
472 
473 
474  template <class LocalOrdinal, class GlobalOrdinal, class Node>
476  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
477  const Teuchos::RCP<const map_type>& colMap,
478  const Teuchos::ArrayView<const size_t>& numEntPerRow,
479  const ProfileType /* pftype */,
480  const Teuchos::RCP<Teuchos::ParameterList>& params) :
481  dist_object_type (rowMap)
482  , rowMap_ (rowMap)
483  , colMap_ (colMap)
484  , numAllocForAllRows_ (0)
485  {
486  const char tfecfFuncName[] =
487  "CrsGraph(rowMap,colMap,numEntPerRow,pftype,params): ";
488  staticAssertions ();
489 
490  const size_t lclNumRows = rowMap.is_null () ?
491  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
492  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
493  static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
494  std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
495  << " != the local number of rows " << lclNumRows << " as specified by "
496  "the input row Map.");
497 
498  if (debug_) {
499  for (size_t r = 0; r < lclNumRows; ++r) {
500  const size_t curRowCount = numEntPerRow[r];
501  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
502  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
503  std::invalid_argument, "numEntPerRow(" << r << ") "
504  "specifies an invalid number of entries "
505  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
506  }
507  }
508 
509  // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
510  // The latter is a const View, so we have to copy into a nonconst
511  // View first, then assign.
512  typedef decltype (k_numAllocPerRow_) out_view_type;
513  typedef typename out_view_type::non_const_type nc_view_type;
514  typedef Kokkos::View<const size_t*,
515  typename nc_view_type::array_layout,
516  Kokkos::HostSpace,
517  Kokkos::MemoryUnmanaged> in_view_type;
518  in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
519  nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
520  lclNumRows);
521  Kokkos::deep_copy (numAllocPerRowOut, numAllocPerRowIn);
522  k_numAllocPerRow_ = numAllocPerRowOut;
523 
524  resumeFill (params);
526  }
527 
528 
529  template <class LocalOrdinal, class GlobalOrdinal, class Node>
531  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
532  const Teuchos::RCP<const map_type>& colMap,
533  const typename local_graph_type::row_map_type& rowPointers,
534  const typename local_graph_type::entries_type::non_const_type& columnIndices,
535  const Teuchos::RCP<Teuchos::ParameterList>& params) :
536  dist_object_type (rowMap)
537  , rowMap_(rowMap)
538  , colMap_(colMap)
539  , numAllocForAllRows_(0)
540  , storageStatus_(Details::STORAGE_1D_PACKED)
541  , indicesAreAllocated_(true)
542  , indicesAreLocal_(true)
543  {
544  staticAssertions ();
545  if (! params.is_null() && params->isParameter("sorted") &&
546  ! params->get<bool>("sorted")) {
547  indicesAreSorted_ = false;
548  }
549  else {
550  indicesAreSorted_ = true;
551  }
552  setAllIndices (rowPointers, columnIndices);
554  }
555 
556  template <class LocalOrdinal, class GlobalOrdinal, class Node>
558  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
559  const Teuchos::RCP<const map_type>& colMap,
560  const Teuchos::ArrayRCP<size_t>& rowPointers,
561  const Teuchos::ArrayRCP<LocalOrdinal> & columnIndices,
562  const Teuchos::RCP<Teuchos::ParameterList>& params) :
563  dist_object_type (rowMap)
564  , rowMap_ (rowMap)
565  , colMap_ (colMap)
566  , numAllocForAllRows_ (0)
567  , storageStatus_ (Details::STORAGE_1D_PACKED)
568  , indicesAreAllocated_ (true)
569  , indicesAreLocal_ (true)
570  {
571  staticAssertions ();
572  if (! params.is_null() && params->isParameter("sorted") &&
573  ! params->get<bool>("sorted")) {
574  indicesAreSorted_ = false;
575  }
576  else {
577  indicesAreSorted_ = true;
578  }
579  setAllIndices (rowPointers, columnIndices);
581  }
582 
583  template <class LocalOrdinal, class GlobalOrdinal, class Node>
585  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
586  const Teuchos::RCP<const map_type>& colMap,
587  const local_graph_type& k_local_graph_,
588  const Teuchos::RCP<Teuchos::ParameterList>& params)
589  : CrsGraph (k_local_graph_,
590  rowMap,
591  colMap,
592  Teuchos::null,
593  Teuchos::null,
594  params)
595  {}
596 
597  template <class LocalOrdinal, class GlobalOrdinal, class Node>
599  CrsGraph (const local_graph_type& k_local_graph_,
600  const Teuchos::RCP<const map_type>& rowMap,
601  const Teuchos::RCP<const map_type>& colMap,
602  const Teuchos::RCP<const map_type>& domainMap,
603  const Teuchos::RCP<const map_type>& rangeMap,
604  const Teuchos::RCP<Teuchos::ParameterList>& params)
605  : DistObject<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, node_type> (rowMap)
606  , rowMap_ (rowMap)
607  , colMap_ (colMap)
608  , lclGraph_ (k_local_graph_)
609  , numAllocForAllRows_ (0)
610  , storageStatus_ (Details::STORAGE_1D_PACKED)
611  , indicesAreAllocated_ (true)
612  , indicesAreLocal_ (true)
613  {
614  staticAssertions();
615  const char tfecfFuncName[] = "CrsGraph(Kokkos::LocalStaticCrsGraph,Map,Map,Map,Map)";
616 
617  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
618  colMap.is_null (), std::runtime_error,
619  ": The input column Map must be nonnull.");
620  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
621  k_local_graph_.numRows () != rowMap->getNodeNumElements (),
622  std::runtime_error,
623  ": The input row Map and the input local graph need to have the same "
624  "number of rows. The row Map claims " << rowMap->getNodeNumElements ()
625  << " row(s), but the local graph claims " << k_local_graph_.numRows ()
626  << " row(s).");
627 
628  // NOTE (mfh 17 Mar 2014) getNodeNumRows() returns
629  // rowMap_->getNodeNumElements(), but it doesn't have to.
630  // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
631  // k_local_graph_.numRows () != getNodeNumRows (), std::runtime_error,
632  // ": The input row Map and the input local graph need to have the same "
633  // "number of rows. The row Map claims " << getNodeNumRows () << " row(s), "
634  // "but the local graph claims " << k_local_graph_.numRows () << " row(s).");
635  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
636  k_lclInds1D_.extent (0) != 0 || k_gblInds1D_.extent (0) != 0, std::logic_error,
637  ": cannot have 1D data structures allocated.");
638 
639  if(! params.is_null() && params->isParameter("sorted") &&
640  ! params->get<bool>("sorted")) {
641  indicesAreSorted_ = false;
642  }
643  else {
644  indicesAreSorted_ = true;
645  }
646 
647  setDomainRangeMaps (domainMap.is_null() ? rowMap_ : domainMap,
648  rangeMap .is_null() ? rowMap_ : rangeMap);
649  Teuchos::Array<int> remotePIDs (0); // unused output argument
650  this->makeImportExport (remotePIDs, false);
651 
652  k_lclInds1D_ = lclGraph_.entries;
653  k_rowPtrs_ = lclGraph_.row_map;
654 
655  const bool callComputeGlobalConstants = params.get () == nullptr ||
656  params->get ("compute global constants", true);
657 
658  if (callComputeGlobalConstants) {
659  this->computeGlobalConstants ();
660  }
661  this->fillComplete_ = true;
662  this->checkInternalState ();
663  }
664 
665  template <class LocalOrdinal, class GlobalOrdinal, class Node>
667  CrsGraph (const local_graph_type& lclGraph,
668  const Teuchos::RCP<const map_type>& rowMap,
669  const Teuchos::RCP<const map_type>& colMap,
670  const Teuchos::RCP<const map_type>& domainMap,
671  const Teuchos::RCP<const map_type>& rangeMap,
672  const Teuchos::RCP<const import_type>& importer,
673  const Teuchos::RCP<const export_type>& exporter,
674  const Teuchos::RCP<Teuchos::ParameterList>& params) :
675  DistObject<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, node_type> (rowMap),
676  rowMap_ (rowMap),
677  colMap_ (colMap),
678  rangeMap_ (rangeMap.is_null () ? rowMap : rangeMap),
679  domainMap_ (domainMap.is_null () ? rowMap : domainMap),
680  importer_ (importer),
681  exporter_ (exporter),
682  lclGraph_ (lclGraph),
683  numAllocForAllRows_ (0),
684  storageStatus_ (Details::STORAGE_1D_PACKED),
685  indicesAreAllocated_ (true),
686  indicesAreLocal_ (true)
687  {
688  staticAssertions();
689  const char tfecfFuncName[] = "Tpetra::CrsGraph(local_graph_type,"
690  "Map,Map,Map,Map,Import,Export,params): ";
691 
692  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
693  (colMap.is_null (), std::runtime_error,
694  "The input column Map must be nonnull.");
695 
696  k_lclInds1D_ = lclGraph_.entries;
697  k_rowPtrs_ = lclGraph_.row_map;
698 
699  if (! params.is_null() && params->isParameter("sorted") &&
700  ! params->get<bool>("sorted")) {
701  indicesAreSorted_ = false;
702  }
703  else {
704  indicesAreSorted_ = true;
705  }
706 
707  const bool callComputeGlobalConstants =
708  params.get () == nullptr ||
709  params->get ("compute global constants", true);
710  if (callComputeGlobalConstants) {
711  this->computeGlobalConstants ();
712  }
713  fillComplete_ = true;
715  }
716 
717  template <class LocalOrdinal, class GlobalOrdinal, class Node>
718  Teuchos::RCP<const Teuchos::ParameterList>
721  {
722  using Teuchos::RCP;
723  using Teuchos::ParameterList;
724  using Teuchos::parameterList;
725 
726  RCP<ParameterList> params = parameterList ("Tpetra::CrsGraph");
727 
728  // Make a sublist for the Import.
729  RCP<ParameterList> importSublist = parameterList ("Import");
730 
731  // FIXME (mfh 02 Apr 2012) We should really have the Import and
732  // Export objects fill in these lists. However, we don't want to
733  // create an Import or Export unless we need them. For now, we
734  // know that the Import and Export just pass the list directly to
735  // their Distributor, so we can create a Distributor here
736  // (Distributor's constructor is a lightweight operation) and have
737  // it fill in the list.
738 
739  // Fill in Distributor default parameters by creating a
740  // Distributor and asking it to do the work.
741  Distributor distributor (rowMap_->getComm (), importSublist);
742  params->set ("Import", *importSublist, "How the Import performs communication.");
743 
744  // Make a sublist for the Export. For now, it's a clone of the
745  // Import sublist. It's not a shallow copy, though, since we
746  // might like the Import to do communication differently than the
747  // Export.
748  params->set ("Export", *importSublist, "How the Export performs communication.");
749 
750  return params;
751  }
752 
753  template <class LocalOrdinal, class GlobalOrdinal, class Node>
754  void
756  setParameterList (const Teuchos::RCP<Teuchos::ParameterList>& params)
757  {
758  Teuchos::RCP<const Teuchos::ParameterList> validParams =
759  getValidParameters ();
760  params->validateParametersAndSetDefaults (*validParams);
761  this->setMyParamList (params);
762  }
763 
764  template <class LocalOrdinal, class GlobalOrdinal, class Node>
768  {
769  return rowMap_->getGlobalNumElements ();
770  }
771 
772  template <class LocalOrdinal, class GlobalOrdinal, class Node>
776  {
777  const char tfecfFuncName[] = "getGlobalNumCols: ";
778  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
779  ! isFillComplete () || getDomainMap ().is_null (), std::runtime_error,
780  "The graph does not have a domain Map. You may not call this method in "
781  "that case.");
782  return getDomainMap ()->getGlobalNumElements ();
783  }
784 
785  template <class LocalOrdinal, class GlobalOrdinal, class Node>
786  size_t
789  {
790  return this->rowMap_.is_null () ?
791  static_cast<size_t> (0) :
792  this->rowMap_->getNodeNumElements ();
793  }
794 
795  template <class LocalOrdinal, class GlobalOrdinal, class Node>
796  size_t
799  {
800  const char tfecfFuncName[] = "getNodeNumCols: ";
801  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
802  ! hasColMap (), std::runtime_error,
803  "The graph does not have a column Map. You may not call this method "
804  "unless the graph has a column Map. This requires either that a custom "
805  "column Map was given to the constructor, or that fillComplete() has "
806  "been called.");
807  return colMap_.is_null () ? static_cast<size_t> (0) :
808  colMap_->getNodeNumElements ();
809  }
810 
811 
812 
813  template <class LocalOrdinal, class GlobalOrdinal, class Node>
814  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
816  getRowMap () const
817  {
818  return rowMap_;
819  }
820 
821  template <class LocalOrdinal, class GlobalOrdinal, class Node>
822  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
824  getColMap () const
825  {
826  return colMap_;
827  }
828 
829  template <class LocalOrdinal, class GlobalOrdinal, class Node>
830  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
833  {
834  return domainMap_;
835  }
836 
837  template <class LocalOrdinal, class GlobalOrdinal, class Node>
838  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
840  getRangeMap () const
841  {
842  return rangeMap_;
843  }
844 
845  template <class LocalOrdinal, class GlobalOrdinal, class Node>
846  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::import_type>
848  getImporter () const
849  {
850  return importer_;
851  }
852 
853  template <class LocalOrdinal, class GlobalOrdinal, class Node>
854  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::export_type>
856  getExporter () const
857  {
858  return exporter_;
859  }
860 
861  template <class LocalOrdinal, class GlobalOrdinal, class Node>
862  bool
864  hasColMap () const
865  {
866  return ! colMap_.is_null ();
867  }
868 
869  template <class LocalOrdinal, class GlobalOrdinal, class Node>
870  bool
873  {
874  // FIXME (mfh 07 Aug 2014) Why wouldn't storage be optimized if
875  // getNodeNumRows() is zero?
876 
877  const bool isOpt = indicesAreAllocated_ &&
878  k_numRowEntries_.extent (0) == 0 &&
879  getNodeNumRows () > 0;
880 
881  return isOpt;
882  }
883 
884  template <class LocalOrdinal, class GlobalOrdinal, class Node>
888  {
889  return StaticProfile;
890  }
891 
892  template <class LocalOrdinal, class GlobalOrdinal, class Node>
896  {
897  const char tfecfFuncName[] = "getGlobalNumEntries: ";
898  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
899  (! this->haveGlobalConstants_, std::logic_error,
900  "The graph does not have global constants computed, "
901  "but the user has requested them.");
902 
903  return globalNumEntries_;
904  }
905 
906  template <class LocalOrdinal, class GlobalOrdinal, class Node>
907  size_t
910  {
911  typedef LocalOrdinal LO;
912 
913  if (this->indicesAreAllocated_) {
914  const LO lclNumRows = this->getNodeNumRows ();
915  if (lclNumRows == 0) {
916  return static_cast<size_t> (0);
917  }
918  else {
919  // Avoid the "*this capture" issue by creating a local Kokkos::View.
920  auto numEntPerRow = this->k_numRowEntries_;
921  const LO numNumEntPerRow = numEntPerRow.extent (0);
922  if (numNumEntPerRow == 0) {
923  if (static_cast<LO> (this->lclGraph_.row_map.extent (0)) <
924  static_cast<LO> (lclNumRows + 1)) {
925  return static_cast<size_t> (0);
926  }
927  else {
928  return ::Tpetra::Details::getEntryOnHost (this->lclGraph_.row_map, lclNumRows);
929  }
930  }
931  else { // k_numRowEntries_ is populated
932  // k_numRowEntries_ is actually be a host View, so we run
933  // the sum in its native execution space. This also means
934  // that we can use explicit capture (which could perhaps
935  // improve build time) instead of KOKKOS_LAMBDA, and avoid
936  // any CUDA build issues with trying to run a __device__ -
937  // only function on host.
938  typedef typename num_row_entries_type::execution_space
939  host_exec_space;
940  typedef Kokkos::RangePolicy<host_exec_space, LO> range_type;
941 
942  const LO upperLoopBound = lclNumRows < numNumEntPerRow ?
943  lclNumRows :
944  numNumEntPerRow;
945  size_t nodeNumEnt = 0;
946  Kokkos::parallel_reduce ("Tpetra::CrsGraph::getNumNodeEntries",
947  range_type (0, upperLoopBound),
948  [=] (const LO& k, size_t& lclSum) {
949  lclSum += numEntPerRow(k);
950  }, nodeNumEnt);
951  return nodeNumEnt;
952  }
953  }
954  }
955  else { // nothing allocated on this process, so no entries
956  return static_cast<size_t> (0);
957  }
958  }
959 
960  template <class LocalOrdinal, class GlobalOrdinal, class Node>
964  {
965  const char tfecfFuncName[] = "getGlobalMaxNumRowEntries: ";
966  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
967  (! this->haveGlobalConstants_, std::logic_error,
968  "The graph does not have global constants computed, "
969  "but the user has requested them.");
970 
971  return globalMaxNumRowEntries_;
972  }
973 
974  template <class LocalOrdinal, class GlobalOrdinal, class Node>
975  size_t
978  {
979  return nodeMaxNumRowEntries_;
980  }
981 
982  template <class LocalOrdinal, class GlobalOrdinal, class Node>
983  bool
986  {
987  return fillComplete_;
988  }
989 
990  template <class LocalOrdinal, class GlobalOrdinal, class Node>
991  bool
994  {
995  return ! fillComplete_;
996  }
997 
998 
999  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1000  bool
1003  {
1004  return indicesAreLocal_;
1005  }
1006 
1007  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1008  bool
1011  {
1012  return indicesAreGlobal_;
1013  }
1014 
1015  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1016  size_t
1019  {
1020  typedef LocalOrdinal LO;
1021 
1022  if (this->indicesAreAllocated_) {
1023  const LO lclNumRows = this->getNodeNumRows ();
1024  if (lclNumRows == 0) {
1025  return static_cast<size_t> (0);
1026  }
1027  else if (storageStatus_ == Details::STORAGE_1D_PACKED) {
1028  if (static_cast<LO> (this->lclGraph_.row_map.extent (0)) <
1029  static_cast<LO> (lclNumRows + 1)) {
1030  return static_cast<size_t> (0);
1031  }
1032  else {
1033  return ::Tpetra::Details::getEntryOnHost (this->lclGraph_.row_map, lclNumRows);
1034  }
1035  }
1036  else if (storageStatus_ == Details::STORAGE_1D_UNPACKED) {
1037  if (this->k_rowPtrs_.extent (0) == 0) {
1038  return static_cast<size_t> (0);
1039  }
1040  else {
1041  return ::Tpetra::Details::getEntryOnHost (this->k_rowPtrs_, lclNumRows);
1042  }
1043  }
1044  else {
1045  return static_cast<size_t> (0);
1046  }
1047  }
1048  else {
1049  return Tpetra::Details::OrdinalTraits<size_t>::invalid ();
1050  }
1051  }
1052 
1053  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1054  Teuchos::RCP<const Teuchos::Comm<int> >
1056  getComm () const
1057  {
1058  return this->rowMap_.is_null () ? Teuchos::null : this->rowMap_->getComm ();
1059  }
1060 
1061  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1062  GlobalOrdinal
1065  {
1066  return rowMap_->getIndexBase ();
1067  }
1068 
1069  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1070  bool
1072  indicesAreAllocated () const
1073  {
1074  return indicesAreAllocated_;
1075  }
1076 
1077  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1078  bool
1080  isSorted () const
1081  {
1082  return indicesAreSorted_;
1083  }
1084 
1085  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1086  bool
1088  isMerged () const
1089  {
1090  return noRedundancies_;
1091  }
1092 
1093  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1094  void
1097  {
1098  // FIXME (mfh 07 May 2013) How do we know that the change
1099  // introduced a redundancy, or even that it invalidated the sorted
1100  // order of indices? CrsGraph has always made this conservative
1101  // guess. It could be a bit costly to check at insertion time,
1102  // though.
1103  indicesAreSorted_ = false;
1104  noRedundancies_ = false;
1105 
1106  // We've modified the graph, so we'll have to recompute local
1107  // constants like the number of diagonal entries on this process.
1108  haveLocalConstants_ = false;
1109  }
1110 
1111  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1112  void
1114  allocateIndices (const ELocalGlobal lg, const bool verbose)
1115  {
1117  using Teuchos::arcp;
1118  using Teuchos::Array;
1119  using Teuchos::ArrayRCP;
1120  using std::endl;
1121  typedef Teuchos::ArrayRCP<size_t>::size_type size_type;
1122  typedef typename local_graph_type::row_map_type::non_const_type
1123  non_const_row_map_type;
1124  typedef typename local_graph_type::entries_type::non_const_type
1125  lcl_col_inds_type;
1126  typedef Kokkos::View<GlobalOrdinal*,
1127  typename lcl_col_inds_type::array_layout,
1128  device_type> gbl_col_inds_type;
1129  const char tfecfFuncName[] = "allocateIndices: ";
1130  const char suffix[] =
1131  " Please report this bug to the Tpetra developers.";
1132  ProfilingRegion profRegion("Tpetra::CrsGraph::allocateIndices");
1133 
1134  std::unique_ptr<std::string> prefix;
1135  if (verbose) {
1136  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
1137  std::ostringstream os;
1138  os << *prefix << "Start: lg="
1139  << (lg == GlobalIndices ? "GlobalIndices" : "LocalIndices")
1140  << ", numRows: " << this->getNodeNumRows() << endl;
1141  std::cerr << os.str();
1142  }
1143 
1144  // This is a protected function, only callable by us. If it was
1145  // called incorrectly, it is our fault. That's why the tests
1146  // below throw std::logic_error instead of std::invalid_argument.
1147  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1148  (isLocallyIndexed () && lg == GlobalIndices, std::logic_error,
1149  ": The graph is locally indexed, but Tpetra code is calling "
1150  "this method with lg=GlobalIndices." << suffix);
1151  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1152  (isGloballyIndexed () && lg == LocalIndices, std::logic_error,
1153  ": The graph is globally indexed, but Tpetra code is calling "
1154  "this method with lg=LocalIndices." << suffix);
1155  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1156  (indicesAreAllocated (), std::logic_error, ": The graph's "
1157  "indices are already allocated, but Tpetra is calling "
1158  "allocateIndices again." << suffix);
1159  const size_t numRows = this->getNodeNumRows ();
1160 
1161  //
1162  // STATIC ALLOCATION PROFILE
1163  //
1164  if (verbose) {
1165  std::ostringstream os;
1166  os << *prefix << "Allocate k_rowPtrs: " << (numRows+1) << endl;
1167  std::cerr << os.str();
1168  }
1169  non_const_row_map_type k_rowPtrs ("Tpetra::CrsGraph::ptr", numRows + 1);
1170 
1171  if (this->k_numAllocPerRow_.extent (0) != 0) {
1172  // It's OK to throw std::invalid_argument here, because we
1173  // haven't incurred any side effects yet. Throwing that
1174  // exception (and not, say, std::logic_error) implies that the
1175  // instance can recover.
1176  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1177  (this->k_numAllocPerRow_.extent (0) != numRows,
1178  std::invalid_argument, "k_numAllocPerRow_ is allocated, that is, "
1179  "has nonzero length " << this->k_numAllocPerRow_.extent (0)
1180  << ", but its length != numRows = " << numRows << ".");
1181 
1182  // k_numAllocPerRow_ is a host View, but k_rowPtrs (the thing
1183  // we want to compute here) lives on device. That's OK;
1184  // computeOffsetsFromCounts can handle this case.
1186 
1187  // FIXME (mfh 27 Jun 2016) Currently, computeOffsetsFromCounts
1188  // doesn't attempt to check its input for "invalid" flag
1189  // values. For now, we omit that feature of the sequential
1190  // code disabled below.
1191  computeOffsetsFromCounts (k_rowPtrs, k_numAllocPerRow_);
1192  }
1193  else {
1194  // It's OK to throw std::invalid_argument here, because we
1195  // haven't incurred any side effects yet. Throwing that
1196  // exception (and not, say, std::logic_error) implies that the
1197  // instance can recover.
1198  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1199  (this->numAllocForAllRows_ ==
1200  Tpetra::Details::OrdinalTraits<size_t>::invalid (),
1201  std::invalid_argument, "numAllocForAllRows_ has an invalid value, "
1202  "namely Tpetra::Details::OrdinalTraits<size_t>::invalid() = " <<
1203  Tpetra::Details::OrdinalTraits<size_t>::invalid () << ".");
1204 
1206  computeOffsetsFromConstantCount (k_rowPtrs, this->numAllocForAllRows_);
1207  }
1208 
1209  // "Commit" the resulting row offsets.
1210  this->k_rowPtrs_ = k_rowPtrs;
1211 
1212  const size_type numInds =
1213  Details::getEntryOnHost(this->k_rowPtrs_, numRows);
1214  if (lg == LocalIndices) {
1215  if (verbose) {
1216  std::ostringstream os;
1217  os << *prefix << "Allocate local column indices "
1218  "k_lclInds1D_: " << numInds << endl;
1219  std::cerr << os.str();
1220  }
1221  k_lclInds1D_ = lcl_col_inds_type ("Tpetra::CrsGraph::ind", numInds);
1222  }
1223  else {
1224  if (verbose) {
1225  std::ostringstream os;
1226  os << *prefix << "Allocate global column indices "
1227  "k_gblInds1D_: " << numInds << endl;
1228  std::cerr << os.str();
1229  }
1230  k_gblInds1D_ = gbl_col_inds_type ("Tpetra::CrsGraph::ind", numInds);
1231  }
1232  storageStatus_ = Details::STORAGE_1D_UNPACKED;
1233 
1234  this->indicesAreLocal_ = (lg == LocalIndices);
1235  this->indicesAreGlobal_ = (lg == GlobalIndices);
1236 
1237  if (numRows > 0) { // reallocate k_numRowEntries_ & fill w/ 0s
1238  using Kokkos::ViewAllocateWithoutInitializing;
1239  typedef decltype (k_numRowEntries_) row_ent_type;
1240  const char label[] = "Tpetra::CrsGraph::numRowEntries";
1241  if (verbose) {
1242  std::ostringstream os;
1243  os << *prefix << "Allocate k_numRowEntries_: " << numRows
1244  << endl;
1245  std::cerr << os.str();
1246  }
1247  row_ent_type numRowEnt (ViewAllocateWithoutInitializing (label), numRows);
1248  Kokkos::deep_copy (numRowEnt, static_cast<size_t> (0)); // fill w/ 0s
1249  this->k_numRowEntries_ = numRowEnt; // "commit" our allocation
1250  }
1251 
1252  // Once indices are allocated, CrsGraph needs to free this information.
1253  this->numAllocForAllRows_ = 0;
1254  this->k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
1255  this->indicesAreAllocated_ = true;
1256 
1257  try {
1258  this->checkInternalState ();
1259  }
1260  catch (std::logic_error& e) {
1261  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1262  (true, std::logic_error, "At end of allocateIndices, "
1263  "checkInternalState threw std::logic_error: "
1264  << e.what ());
1265  }
1266  catch (std::exception& e) {
1267  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1268  (true, std::runtime_error, "At end of allocateIndices, "
1269  "checkInternalState threw std::exception: "
1270  << e.what ());
1271  }
1272  catch (...) {
1273  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1274  (true, std::runtime_error, "At end of allocateIndices, "
1275  "checkInternalState threw an exception "
1276  "not a subclass of std::exception.");
1277  }
1278 
1279  if (verbose) {
1280  std::ostringstream os;
1281  os << *prefix << "Done" << endl;
1282  std::cerr << os.str();
1283  }
1284  }
1285 
1286  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1287  Teuchos::ArrayView<const LocalOrdinal>
1289  getLocalView (const RowInfo& rowinfo) const
1290  {
1291  using Kokkos::subview;
1292  typedef LocalOrdinal LO;
1293  typedef Kokkos::View<const LO*, execution_space,
1294  Kokkos::MemoryUnmanaged> row_view_type;
1295 
1296  if (rowinfo.allocSize == 0) {
1297  return Teuchos::ArrayView<const LO> ();
1298  }
1299  else { // nothing in the row to view
1300  if (k_lclInds1D_.extent (0) != 0) { // 1-D storage
1301  const size_t start = rowinfo.offset1D;
1302  const size_t len = rowinfo.allocSize;
1303  const std::pair<size_t, size_t> rng (start, start + len);
1304  // mfh 23 Nov 2015: Don't just create a subview of
1305  // k_lclInds1D_ directly, because that first creates a
1306  // _managed_ subview, then returns an unmanaged version of
1307  // that. That touches the reference count, which costs
1308  // performance in a measurable way.
1309  row_view_type rowView = subview (row_view_type (k_lclInds1D_), rng);
1310  const LO* const rowViewRaw = (len == 0) ? nullptr : rowView.data ();
1311  return Teuchos::ArrayView<const LO> (rowViewRaw, len, Teuchos::RCP_DISABLE_NODE_LOOKUP);
1312  }
1313  else {
1314  return Teuchos::ArrayView<const LO> (); // nothing in the row to view
1315  }
1316  }
1317  }
1318 
1319  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1320  LocalOrdinal
1322  getLocalViewRawConst (const LocalOrdinal*& lclInds,
1323  LocalOrdinal& capacity,
1324  const RowInfo& rowInfo) const
1325  {
1326  lclInds = nullptr;
1327  capacity = 0;
1328 
1329  if (rowInfo.allocSize != 0 && k_lclInds1D_.extent (0) != 0) {
1330  if (debug_) {
1331  if (rowInfo.offset1D + rowInfo.allocSize >
1332  static_cast<size_t> (k_lclInds1D_.extent (0))) {
1333  return static_cast<LocalOrdinal> (-1);
1334  }
1335  }
1336  lclInds = k_lclInds1D_.data () + rowInfo.offset1D;
1337  capacity = rowInfo.allocSize;
1338  }
1339  return static_cast<LocalOrdinal> (0);
1340  }
1341 
1342  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1343  Teuchos::ArrayView<LocalOrdinal>
1346  {
1347  using Kokkos::subview;
1348  typedef LocalOrdinal LO;
1349  typedef Kokkos::View<LO*, execution_space,
1350  Kokkos::MemoryUnmanaged> row_view_type;
1351 
1352  if (rowinfo.allocSize == 0) { // nothing in the row to view
1353  return Teuchos::ArrayView<LO> ();
1354  }
1355  else {
1356  if (k_lclInds1D_.extent (0) != 0) { // 1-D storage
1357  const size_t start = rowinfo.offset1D;
1358  const size_t len = rowinfo.allocSize;
1359  const std::pair<size_t, size_t> rng (start, start + len);
1360  // mfh 23 Nov 2015: Don't just create a subview of
1361  // k_lclInds1D_ directly, because that first creates a
1362  // _managed_ subview, then returns an unmanaged version of
1363  // that. That touches the reference count, which costs
1364  // performance in a measurable way.
1365  row_view_type rowView = subview (row_view_type (k_lclInds1D_), rng);
1366  LO* const rowViewRaw = (len == 0) ? nullptr : rowView.data ();
1367  return Teuchos::ArrayView<LO> (rowViewRaw, len, Teuchos::RCP_DISABLE_NODE_LOOKUP);
1368  }
1369  else {
1370  return Teuchos::ArrayView<LO> (); // nothing in the row to view
1371  }
1372  }
1373  }
1374 
1375 
1376  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1377  Kokkos::View<const LocalOrdinal*,
1379  Kokkos::MemoryUnmanaged>
1381  getLocalKokkosRowView (const RowInfo& rowInfo) const
1382  {
1383  typedef LocalOrdinal LO;
1384  typedef Kokkos::View<const LO*, execution_space,
1385  Kokkos::MemoryUnmanaged> row_view_type;
1386 
1387  if (rowInfo.allocSize == 0) {
1388  return row_view_type ();
1389  }
1390  else { // nothing in the row to view
1391  if (k_lclInds1D_.extent (0) != 0) { // 1-D storage
1392  const size_t start = rowInfo.offset1D;
1393  const size_t len = rowInfo.allocSize;
1394  const std::pair<size_t, size_t> rng (start, start + len);
1395  // mfh 23 Nov 2015: Don't just create a subview of
1396  // k_lclInds1D_ directly, because that first creates a
1397  // _managed_ subview, then returns an unmanaged version of
1398  // that. That touches the reference count, which costs
1399  // performance in a measurable way.
1400  return Kokkos::subview (row_view_type (k_lclInds1D_), rng);
1401  }
1402  else {
1403  return row_view_type (); // nothing in the row to view
1404  }
1405  }
1406  }
1407 
1408 
1409  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1410  Kokkos::View<LocalOrdinal*,
1411  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::execution_space,
1412  Kokkos::MemoryUnmanaged>
1413  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1414  getLocalKokkosRowViewNonConst (const RowInfo& rowInfo)
1415  {
1416  using row_view_type = Kokkos::View<LocalOrdinal*,
1417  execution_space, Kokkos::MemoryUnmanaged>;
1418 
1419  if (rowInfo.allocSize == 0) {
1420  return row_view_type ();
1421  }
1422  else { // nothing in the row to view
1423  if (k_lclInds1D_.extent (0) != 0) { // 1-D storage
1424  const size_t start = rowInfo.offset1D;
1425  const size_t len = rowInfo.allocSize;
1426  const std::pair<size_t, size_t> rng (start, start + len);
1427  // mfh 23 Nov 2015: Don't just create a subview of
1428  // k_lclInds1D_ directly, because that first creates a
1429  // _managed_ subview, then returns an unmanaged version of
1430  // that. That touches the reference count, which costs
1431  // performance in a measurable way.
1432  return Kokkos::subview (row_view_type (this->k_lclInds1D_), rng);
1433  }
1434  else {
1435  return row_view_type (); // nothing in the row to view
1436  }
1437  }
1438  }
1439 
1440 
1441  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1442  Kokkos::View<const GlobalOrdinal*,
1443  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::execution_space,
1444  Kokkos::MemoryUnmanaged>
1445  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1446  getGlobalKokkosRowView (const RowInfo& rowinfo) const
1447  {
1448  using row_view_type = Kokkos::View<const GlobalOrdinal*,
1449  execution_space, Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
1450 
1451  if (rowinfo.allocSize == 0) {
1452  return row_view_type ();
1453  }
1454  else { // nothing in the row to view
1455  if (this->k_gblInds1D_.extent (0) != 0) { // 1-D storage
1456  const size_t start = rowinfo.offset1D;
1457  const size_t len = rowinfo.allocSize;
1458  const std::pair<size_t, size_t> rng (start, start + len);
1459  // mfh 23 Nov 2015: Don't just create a subview of
1460  // k_gblInds1D_ directly, because that first creates a
1461  // _managed_ subview, then returns an unmanaged version of
1462  // that. That touches the reference count, which costs
1463  // performance in a measurable way.
1464  return Kokkos::subview (row_view_type (this->k_gblInds1D_), rng);
1465  }
1466  else {
1467  return row_view_type (); // nothing in the row to view
1468  }
1469  }
1470  }
1471 
1472 
1473  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1474  Teuchos::ArrayView<const GlobalOrdinal>
1476  getGlobalView (const RowInfo& rowinfo) const
1477  {
1478  using GO = global_ordinal_type;
1479 
1480  Teuchos::ArrayView<const GO> view;
1481  if (rowinfo.allocSize > 0 && k_gblInds1D_.extent (0) != 0) {
1482  const auto rng =
1483  std::make_pair (rowinfo.offset1D,
1484  rowinfo.offset1D + rowinfo.allocSize);
1485  // mfh 23 Nov 2015: Don't just create a subview of
1486  // k_gblInds1D_ directly, because that first creates a
1487  // _managed_ subview, then returns an unmanaged version of
1488  // that. That touches the reference count, which costs
1489  // performance in a measurable way.
1490  using row_view_type = Kokkos::View<const GO*,
1491  execution_space, Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
1492  row_view_type k_gblInds1D_unmanaged = k_gblInds1D_;
1493  using Kokkos::Compat::getConstArrayView;
1494  using Kokkos::subview;
1495  view = getConstArrayView (subview (k_gblInds1D_unmanaged, rng));
1496  }
1497  return view;
1498  }
1499 
1500 
1501  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1502  LocalOrdinal
1504  getGlobalViewRawConst (const GlobalOrdinal*& gblInds,
1505  LocalOrdinal& capacity,
1506  const RowInfo& rowInfo) const
1507  {
1508  gblInds = nullptr;
1509  capacity = 0;
1510 
1511  if (rowInfo.allocSize != 0 && k_gblInds1D_.extent (0) != 0) {
1512  if (debug_) {
1513  if (rowInfo.offset1D + rowInfo.allocSize >
1514  static_cast<size_t> (k_gblInds1D_.extent (0))) {
1515  return static_cast<LocalOrdinal> (-1);
1516  }
1517  }
1518  gblInds = k_gblInds1D_.data () + rowInfo.offset1D;
1519  capacity = rowInfo.allocSize;
1520  }
1521  return static_cast<LocalOrdinal> (0);
1522  }
1523 
1524 
1525  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1526  Teuchos::ArrayView<GlobalOrdinal>
1529  {
1530  using GO = global_ordinal_type;
1531 
1532  Teuchos::ArrayView<GO> view;
1533  if (rowinfo.allocSize > 0 && k_gblInds1D_.extent (0) != 0) {
1534  const auto rng =
1535  std::make_pair (rowinfo.offset1D,
1536  rowinfo.offset1D + rowinfo.allocSize);
1537  // mfh 23 Nov 2015: Don't just create a subview of
1538  // k_gblInds1D_ directly, because that first creates a
1539  // _managed_ subview, then returns an unmanaged version of
1540  // that. That touches the reference count, which costs
1541  // performance in a measurable way.
1542  using row_view_type = Kokkos::View<GO*, execution_space,
1543  Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
1544  row_view_type k_gblInds1D_unmanaged = k_gblInds1D_;
1545  using Kokkos::Compat::getArrayView;
1546  using Kokkos::subview;
1547  view = getArrayView (subview (k_gblInds1D_unmanaged, rng));
1548  }
1549  return view;
1550  }
1551 
1552 
1553  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1554  RowInfo
1556  getRowInfo (const LocalOrdinal myRow) const
1557  {
1558  const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1559  RowInfo ret;
1560  if (this->rowMap_.is_null () || ! this->rowMap_->isNodeLocalElement (myRow)) {
1561  ret.localRow = STINV;
1562  ret.allocSize = 0;
1563  ret.numEntries = 0;
1564  ret.offset1D = STINV;
1565  return ret;
1566  }
1567 
1568  ret.localRow = static_cast<size_t> (myRow);
1569  if (this->indicesAreAllocated ()) {
1570  // Offsets tell us the allocation size in this case.
1571  if (this->k_rowPtrs_.extent (0) == 0) {
1572  ret.offset1D = 0;
1573  ret.allocSize = 0;
1574  }
1575  else {
1576  ret.offset1D = this->k_rowPtrs_(myRow);
1577  ret.allocSize = this->k_rowPtrs_(myRow+1) - this->k_rowPtrs_(myRow);
1578  }
1579 
1580  ret.numEntries = (this->k_numRowEntries_.extent (0) == 0) ?
1581  ret.allocSize :
1582  this->k_numRowEntries_(myRow);
1583  }
1584  else { // haven't performed allocation yet; probably won't hit this code
1585  // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1586  // allocate, rather than doing lazy allocation at first insert.
1587  // This will make k_numAllocPerRow_ obsolete.
1588  ret.allocSize = (this->k_numAllocPerRow_.extent (0) != 0) ?
1589  this->k_numAllocPerRow_(myRow) : // this is a host View
1590  this->numAllocForAllRows_;
1591  ret.numEntries = 0;
1592  ret.offset1D = STINV;
1593  }
1594 
1595  return ret;
1596  }
1597 
1598 
1599  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1600  RowInfo
1602  getRowInfoFromGlobalRowIndex (const GlobalOrdinal gblRow) const
1603  {
1604  const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1605  RowInfo ret;
1606  if (this->rowMap_.is_null ()) {
1607  ret.localRow = STINV;
1608  ret.allocSize = 0;
1609  ret.numEntries = 0;
1610  ret.offset1D = STINV;
1611  return ret;
1612  }
1613  const LocalOrdinal myRow = this->rowMap_->getLocalElement (gblRow);
1614  if (myRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid ()) {
1615  ret.localRow = STINV;
1616  ret.allocSize = 0;
1617  ret.numEntries = 0;
1618  ret.offset1D = STINV;
1619  return ret;
1620  }
1621 
1622  ret.localRow = static_cast<size_t> (myRow);
1623  if (this->indicesAreAllocated ()) {
1624  // graph data structures have the info that we need
1625  //
1626  // if static graph, offsets tell us the allocation size
1627  if (this->k_rowPtrs_.extent (0) == 0) {
1628  ret.offset1D = 0;
1629  ret.allocSize = 0;
1630  }
1631  else {
1632  ret.offset1D = this->k_rowPtrs_(myRow);
1633  ret.allocSize = this->k_rowPtrs_(myRow+1) - this->k_rowPtrs_(myRow);
1634  }
1635 
1636  ret.numEntries = (this->k_numRowEntries_.extent (0) == 0) ?
1637  ret.allocSize :
1638  this->k_numRowEntries_(myRow);
1639  }
1640  else { // haven't performed allocation yet; probably won't hit this code
1641  // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1642  // allocate, rather than doing lazy allocation at first insert.
1643  // This will make k_numAllocPerRow_ obsolete.
1644  ret.allocSize = (this->k_numAllocPerRow_.extent (0) != 0) ?
1645  this->k_numAllocPerRow_(myRow) : // this is a host View
1646  this->numAllocForAllRows_;
1647  ret.numEntries = 0;
1648  ret.offset1D = STINV;
1649  }
1650 
1651  return ret;
1652  }
1653 
1654 
1655  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1656  void
1658  staticAssertions () const
1659  {
1660  using Teuchos::OrdinalTraits;
1661  typedef LocalOrdinal LO;
1662  typedef GlobalOrdinal GO;
1663  typedef global_size_t GST;
1664 
1665  // Assumption: sizeof(GlobalOrdinal) >= sizeof(LocalOrdinal):
1666  // This is so that we can store local indices in the memory
1667  // formerly occupied by global indices.
1668  static_assert (sizeof (GlobalOrdinal) >= sizeof (LocalOrdinal),
1669  "Tpetra::CrsGraph: sizeof(GlobalOrdinal) must be >= sizeof(LocalOrdinal).");
1670  // Assumption: max(size_t) >= max(LocalOrdinal)
1671  // This is so that we can represent any LocalOrdinal as a size_t.
1672  static_assert (sizeof (size_t) >= sizeof (LocalOrdinal),
1673  "Tpetra::CrsGraph: sizeof(size_t) must be >= sizeof(LocalOrdinal).");
1674  static_assert (sizeof(GST) >= sizeof(size_t),
1675  "Tpetra::CrsGraph: sizeof(Tpetra::global_size_t) must be >= sizeof(size_t).");
1676 
1677  // FIXME (mfh 30 Sep 2015) We're not using
1678  // Teuchos::CompileTimeAssert any more. Can we do these checks
1679  // with static_assert?
1680 
1681  // can't call max() with CompileTimeAssert, because it isn't a
1682  // constant expression; will need to make this a runtime check
1683  const char msg[] = "Tpetra::CrsGraph: Object cannot be created with the "
1684  "given template arguments: size assumptions are not valid.";
1685  TEUCHOS_TEST_FOR_EXCEPTION(
1686  static_cast<size_t> (Teuchos::OrdinalTraits<LO>::max ()) > Teuchos::OrdinalTraits<size_t>::max (),
1687  std::runtime_error, msg);
1688  TEUCHOS_TEST_FOR_EXCEPTION(
1689  static_cast<GST> (Teuchos::OrdinalTraits<LO>::max ()) > static_cast<GST> (Teuchos::OrdinalTraits<GO>::max ()),
1690  std::runtime_error, msg);
1691  TEUCHOS_TEST_FOR_EXCEPTION(
1692  static_cast<size_t> (Teuchos::OrdinalTraits<GO>::max ()) > Teuchos::OrdinalTraits<GST>::max(),
1693  std::runtime_error, msg);
1694  TEUCHOS_TEST_FOR_EXCEPTION(
1695  Teuchos::OrdinalTraits<size_t>::max () > Teuchos::OrdinalTraits<GST>::max (),
1696  std::runtime_error, msg);
1697  }
1698 
1699 
1700  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1701  size_t
1704  const SLocalGlobalViews &newInds,
1705  const ELocalGlobal lg,
1706  const ELocalGlobal I)
1707  {
1708  using Teuchos::ArrayView;
1709  typedef LocalOrdinal LO;
1710  typedef GlobalOrdinal GO;
1711  const char tfecfFuncName[] = "insertIndices: ";
1712 
1713  size_t oldNumEnt = 0;
1714  if (debug_) {
1715  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1716  (lg != GlobalIndices && lg != LocalIndices, std::invalid_argument,
1717  "lg must be either GlobalIndices or LocalIndices.");
1718  oldNumEnt = this->getNumEntriesInLocalRow (rowinfo.localRow);
1719  }
1720 
1721  size_t numNewInds = 0;
1722  if (lg == GlobalIndices) { // input indices are global
1723  ArrayView<const GO> new_ginds = newInds.ginds;
1724  numNewInds = new_ginds.size();
1725  if (I == GlobalIndices) { // store global indices
1726  ArrayView<GO> gind_view = this->getGlobalViewNonConst (rowinfo);
1727  if (debug_) {
1728  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1729  (static_cast<size_t> (gind_view.size ()) <
1730  rowinfo.numEntries + numNewInds, std::logic_error,
1731  "gind_view.size() = " << gind_view.size ()
1732  << " < rowinfo.numEntries (= " << rowinfo.numEntries
1733  << ") + numNewInds (= " << numNewInds << ").");
1734  }
1735  GO* const gblColInds_out = gind_view.getRawPtr () + rowinfo.numEntries;
1736  for (size_t k = 0; k < numNewInds; ++k) {
1737  gblColInds_out[k] = new_ginds[k];
1738  }
1739  }
1740  else if (I == LocalIndices) { // store local indices
1741  ArrayView<LO> lind_view = this->getLocalViewNonConst (rowinfo);
1742  if (debug_) {
1743  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1744  (static_cast<size_t> (lind_view.size ()) <
1745  rowinfo.numEntries + numNewInds, std::logic_error,
1746  "lind_view.size() = " << lind_view.size ()
1747  << " < rowinfo.numEntries (= " << rowinfo.numEntries
1748  << ") + numNewInds (= " << numNewInds << ").");
1749  }
1750  LO* const lclColInds_out = lind_view.getRawPtr () + rowinfo.numEntries;
1751  for (size_t k = 0; k < numNewInds; ++k) {
1752  lclColInds_out[k] = colMap_->getLocalElement (new_ginds[k]);
1753  }
1754  }
1755  }
1756  else if (lg == LocalIndices) { // input indices are local
1757  ArrayView<const LO> new_linds = newInds.linds;
1758  numNewInds = new_linds.size();
1759  if (I == LocalIndices) { // store local indices
1760  ArrayView<LO> lind_view = this->getLocalViewNonConst (rowinfo);
1761  if (debug_) {
1762  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1763  (static_cast<size_t> (lind_view.size ()) <
1764  rowinfo.numEntries + numNewInds, std::logic_error,
1765  "lind_view.size() = " << lind_view.size ()
1766  << " < rowinfo.numEntries (= " << rowinfo.numEntries
1767  << ") + numNewInds (= " << numNewInds << ").");
1768  }
1769  LO* const lclColInds_out = lind_view.getRawPtr () + rowinfo.numEntries;
1770  for (size_t k = 0; k < numNewInds; ++k) {
1771  lclColInds_out[k] = new_linds[k];
1772  }
1773  }
1774  else if (I == GlobalIndices) {
1775  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1776  (true, std::logic_error, "The case where the input indices are local "
1777  "and the indices to write are global (lg=LocalIndices, I="
1778  "GlobalIndices) is not implemented, because it does not make sense."
1779  << std::endl << "If you have correct local column indices, that "
1780  "means the graph has a column Map. In that case, you should be "
1781  "storing local indices.");
1782  }
1783  }
1784 
1785  rowinfo.numEntries += numNewInds;
1786  this->k_numRowEntries_(rowinfo.localRow) += numNewInds;
1787  this->setLocallyModified ();
1788 
1789  if (debug_) {
1790  const size_t chkNewNumEnt =
1791  this->getNumEntriesInLocalRow (rowinfo.localRow);
1792  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1793  (chkNewNumEnt != oldNumEnt + numNewInds, std::logic_error,
1794  "chkNewNumEnt = " << chkNewNumEnt
1795  << " != oldNumEnt (= " << oldNumEnt
1796  << ") + numNewInds (= " << numNewInds << ").");
1797  }
1798 
1799  return numNewInds;
1800  }
1801 
1802  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1803  size_t
1805  insertGlobalIndicesImpl (const LocalOrdinal lclRow,
1806  const GlobalOrdinal inputGblColInds[],
1807  const size_t numInputInds)
1808  {
1809  return this->insertGlobalIndicesImpl (this->getRowInfo (lclRow),
1810  inputGblColInds, numInputInds);
1811  }
1812 
1813  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1814  size_t
1817  const GlobalOrdinal inputGblColInds[],
1818  const size_t numInputInds,
1819  std::function<void(const size_t, const size_t, const size_t)> fun)
1820  {
1822  using Kokkos::View;
1823  using Kokkos::subview;
1824  using Kokkos::MemoryUnmanaged;
1825  using Teuchos::ArrayView;
1826  using LO = LocalOrdinal;
1827  using GO = GlobalOrdinal;
1828  const char tfecfFuncName[] = "insertGlobalIndicesImpl: ";
1829  const LO lclRow = static_cast<LO> (rowInfo.localRow);
1830 
1831  auto numEntries = rowInfo.numEntries;
1832  using inp_view_type = View<const GO*, execution_space, MemoryUnmanaged>;
1833  inp_view_type inputInds(inputGblColInds, numInputInds);
1834  size_t numInserted = Details::insertCrsIndices(lclRow, k_rowPtrs_,
1835  this->k_gblInds1D_, numEntries, inputInds, fun);
1836 
1837  const bool insertFailed =
1838  numInserted == Teuchos::OrdinalTraits<size_t>::invalid();
1839  if(insertFailed) {
1840  constexpr size_t ONE (1);
1841  const int myRank = this->getComm()->getRank();
1842  std::ostringstream os;
1843 
1844  os << "Proc " << myRank << ": Not enough capacity to insert "
1845  << numInputInds
1846  << " ind" << (numInputInds != ONE ? "ices" : "ex")
1847  << " into local row " << lclRow << ", which currently has "
1848  << rowInfo.numEntries
1849  << " entr" << (rowInfo.numEntries != ONE ? "ies" : "y")
1850  << " and total allocation size " << rowInfo.allocSize
1851  << ". ";
1852  const size_t maxNumToPrint =
1854  ArrayView<const GO> inputGblColIndsView(inputGblColInds,
1855  numInputInds);
1856  verbosePrintArray(os, inputGblColIndsView, "Input global "
1857  "column indices", maxNumToPrint);
1858  os << ", ";
1859  const GO* const curGblColInds =
1860  k_gblInds1D_.data() + rowInfo.offset1D;
1861  ArrayView<const GO> curGblColIndsView(curGblColInds,
1862  rowInfo.numEntries);
1863  verbosePrintArray(os, curGblColIndsView, "Current global "
1864  "column indices", maxNumToPrint);
1865  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1866  (true, std::runtime_error, os.str());
1867  }
1868 
1869  this->k_numRowEntries_(lclRow) += numInserted;
1870  this->setLocallyModified();
1871  return numInserted;
1872  }
1873 
1874 
1875  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1876  void
1878  insertLocalIndicesImpl (const LocalOrdinal myRow,
1879  const Teuchos::ArrayView<const LocalOrdinal>& indices,
1880  std::function<void(const size_t, const size_t, const size_t)> fun)
1881  {
1882  using Kokkos::MemoryUnmanaged;
1883  using Kokkos::subview;
1884  using Kokkos::View;
1885  using LO = LocalOrdinal;
1886  const char tfecfFuncName[] = "insertLocallIndicesImpl: ";
1887 
1888  const RowInfo rowInfo = this->getRowInfo(myRow);
1889 
1890  size_t numNewInds = 0;
1891  size_t newNumEntries = 0;
1892 
1893  auto numEntries = rowInfo.numEntries;
1894  // Note: Teuchos::ArrayViews are in HostSpace
1895  using inp_view_type = View<const LO*, Kokkos::HostSpace, MemoryUnmanaged>;
1896  inp_view_type inputInds(indices.getRawPtr(), indices.size());
1897  auto numInserted = Details::insertCrsIndices(myRow, k_rowPtrs_,
1898  this->k_lclInds1D_, numEntries, inputInds, fun);
1899 
1900  const bool insertFailed =
1901  numInserted == Teuchos::OrdinalTraits<size_t>::invalid();
1902  if(insertFailed) {
1903  constexpr size_t ONE (1);
1904  const size_t numInputInds(indices.size());
1905  const int myRank = this->getComm()->getRank();
1906  std::ostringstream os;
1907  os << "On MPI Process " << myRank << ": Not enough capacity to "
1908  "insert " << numInputInds
1909  << " ind" << (numInputInds != ONE ? "ices" : "ex")
1910  << " into local row " << myRow << ", which currently has "
1911  << rowInfo.numEntries
1912  << " entr" << (rowInfo.numEntries != ONE ? "ies" : "y")
1913  << " and total allocation size " << rowInfo.allocSize << ".";
1914  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1915  (true, std::runtime_error, os.str());
1916  }
1917  numNewInds = numInserted;
1918  newNumEntries = rowInfo.numEntries + numNewInds;
1919 
1920  this->k_numRowEntries_(myRow) += numNewInds;
1921  this->setLocallyModified ();
1922 
1923  if (debug_) {
1924  const size_t chkNewNumEntries = this->getNumEntriesInLocalRow (myRow);
1925  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1926  (chkNewNumEntries != newNumEntries, std::logic_error,
1927  "getNumEntriesInLocalRow(" << myRow << ") = " << chkNewNumEntries
1928  << " != newNumEntries = " << newNumEntries
1929  << ". Please report this bug to the Tpetra developers.");
1930  }
1931  }
1932 
1933 
1934  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1935  size_t
1938  const Teuchos::ArrayView<const LocalOrdinal>& indices,
1939  std::function<void(const size_t, const size_t, const size_t)> fun) const
1940  {
1941  using LO = LocalOrdinal;
1942  using inp_view_type = Kokkos::View<const LO*, Kokkos::HostSpace,
1943  Kokkos::MemoryUnmanaged>;
1944  inp_view_type inputInds(indices.getRawPtr(), indices.size());
1945 
1946  size_t numFound = 0;
1947  LO lclRow = rowInfo.localRow;
1948  if (this->isLocallyIndexed())
1949  {
1950  numFound = Details::findCrsIndices(lclRow, k_rowPtrs_, rowInfo.numEntries,
1951  this->k_lclInds1D_, inputInds, fun);
1952  }
1953  else if (this->isGloballyIndexed())
1954  {
1955  if (this->colMap_.is_null())
1956  return Teuchos::OrdinalTraits<size_t>::invalid();
1957  const auto& colMap = *(this->colMap_);
1958  auto map = [&](LO const lclInd){return colMap.getGlobalElement(lclInd);};
1959  numFound = Details::findCrsIndices(lclRow, k_rowPtrs_, rowInfo.numEntries,
1960  this->k_gblInds1D_, inputInds, map, fun);
1961  }
1962  return numFound;
1963  }
1964 
1965 
1966  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1967  size_t
1969  findGlobalIndices(const RowInfo& rowInfo,
1970  const Teuchos::ArrayView<const GlobalOrdinal>& indices,
1971  std::function<void(const size_t, const size_t, const size_t)> fun) const
1972  {
1973  using GO = GlobalOrdinal;
1974  using Kokkos::View;
1975  using Kokkos::MemoryUnmanaged;
1976  auto invalidCount = Teuchos::OrdinalTraits<size_t>::invalid();
1977 
1978  using inp_view_type = View<const GO*, execution_space, MemoryUnmanaged>;
1979  inp_view_type inputInds(indices.getRawPtr(), indices.size());
1980 
1981  size_t numFound = 0;
1982  LocalOrdinal lclRow = rowInfo.localRow;
1983  if (this->isLocallyIndexed())
1984  {
1985  if (this->colMap_.is_null())
1986  return invalidCount;
1987  const auto& colMap = *(this->colMap_);
1988  auto map = [&](GO const gblInd){return colMap.getLocalElement(gblInd);};
1989  numFound = Details::findCrsIndices(lclRow, k_rowPtrs_, rowInfo.numEntries,
1990  this->k_lclInds1D_, inputInds, map, fun);
1991  }
1992  else if (this->isGloballyIndexed())
1993  {
1994  numFound = Details::findCrsIndices(lclRow, k_rowPtrs_, rowInfo.numEntries,
1995  this->k_gblInds1D_, inputInds, fun);
1996  }
1997  return numFound;
1998  }
1999 
2000 
2001  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2002  size_t
2005  const bool sorted,
2006  const bool merged)
2007  {
2008  const size_t origNumEnt = rowInfo.numEntries;
2009  if (origNumEnt != Tpetra::Details::OrdinalTraits<size_t>::invalid () &&
2010  origNumEnt != 0) {
2011  auto lclColInds = this->getLocalKokkosRowViewNonConst (rowInfo);
2012 
2013  LocalOrdinal* const lclColIndsRaw = lclColInds.data ();
2014  if (! sorted) {
2015  // FIXME (mfh 08 May 2017) This assumes CUDA UVM.
2016  std::sort (lclColIndsRaw, lclColIndsRaw + origNumEnt);
2017  }
2018 
2019  if (! merged) {
2020  LocalOrdinal* const beg = lclColIndsRaw;
2021  LocalOrdinal* const end = beg + rowInfo.numEntries;
2022  // FIXME (mfh 08 May 2017) This assumes CUDA UVM.
2023  LocalOrdinal* const newend = std::unique (beg, end);
2024  const size_t newNumEnt = newend - beg;
2025 
2026  // NOTE (mfh 08 May 2017) This is a host View, so it does not assume UVM.
2027  this->k_numRowEntries_(rowInfo.localRow) = newNumEnt;
2028  return origNumEnt - newNumEnt; // the number of duplicates in the row
2029  }
2030  else {
2031  return static_cast<size_t> (0); // assume no duplicates
2032  }
2033  }
2034  else {
2035  return static_cast<size_t> (0); // no entries in the row
2036  }
2037  }
2038 
2039 
2040  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2041  void
2043  setDomainRangeMaps (const Teuchos::RCP<const map_type>& domainMap,
2044  const Teuchos::RCP<const map_type>& rangeMap)
2045  {
2046  // simple pointer comparison for equality
2047  if (domainMap_ != domainMap) {
2048  domainMap_ = domainMap;
2049  importer_ = Teuchos::null;
2050  }
2051  if (rangeMap_ != rangeMap) {
2052  rangeMap_ = rangeMap;
2053  exporter_ = Teuchos::null;
2054  }
2055  }
2056 
2057 
2058  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2059  void
2062  {
2063  const auto INV = Teuchos::OrdinalTraits<global_size_t>::invalid();
2064 
2065  globalNumEntries_ = INV;
2066  globalMaxNumRowEntries_ = INV;
2067  haveGlobalConstants_ = false;
2068  }
2069 
2070 
2071  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2072  void
2075  {
2076  if (debug_) {
2077  using std::endl;
2078  const char tfecfFuncName[] = "checkInternalState: ";
2079  const char suffix[] = " Please report this bug to the Tpetra developers.";
2080 
2081  std::unique_ptr<std::string> prefix;
2082  if (verbose_) {
2083  prefix = this->createPrefix("CrsGraph", "checkInternalState");
2084  std::ostringstream os;
2085  os << *prefix << "Start" << endl;
2086  std::cerr << os.str();
2087  }
2088 
2089  const global_size_t GSTI = Teuchos::OrdinalTraits<global_size_t>::invalid ();
2090  //const size_t STI = Teuchos::OrdinalTraits<size_t>::invalid (); // unused
2091  // check the internal state of this data structure
2092  // this is called by numerous state-changing methods, in a debug build, to ensure that the object
2093  // always remains in a valid state
2094 
2095  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2096  (this->rowMap_.is_null (), std::logic_error,
2097  "Row Map is null." << suffix);
2098  // This may access the row Map, so we need to check first (above)
2099  // whether the row Map is null.
2100  const LocalOrdinal lclNumRows =
2101  static_cast<LocalOrdinal> (this->getNodeNumRows ());
2102 
2103  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2104  (this->isFillActive () == this->isFillComplete (), std::logic_error,
2105  "Graph cannot be both fill active and fill complete." << suffix);
2106  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2107  (this->isFillComplete () &&
2108  (this->colMap_.is_null () ||
2109  this->rangeMap_.is_null () ||
2110  this->domainMap_.is_null ()),
2111  std::logic_error,
2112  "Graph is full complete, but at least one of {column, range, domain} "
2113  "Map is null." << suffix);
2114  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2115  (this->isStorageOptimized () && ! this->indicesAreAllocated (),
2116  std::logic_error, "Storage is optimized, but indices are not "
2117  "allocated, not even trivially." << suffix);
2118 
2119  size_t nodeAllocSize = 0;
2120  try {
2121  nodeAllocSize = this->getNodeAllocationSize ();
2122  }
2123  catch (std::logic_error& e) {
2124  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2125  (true, std::runtime_error, "getNodeAllocationSize threw "
2126  "std::logic_error: " << e.what ());
2127  }
2128  catch (std::exception& e) {
2129  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2130  (true, std::runtime_error, "getNodeAllocationSize threw an "
2131  "std::exception: " << e.what ());
2132  }
2133  catch (...) {
2134  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2135  (true, std::runtime_error, "getNodeAllocationSize threw an exception "
2136  "not a subclass of std::exception.");
2137  }
2138 
2139  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2140  (this->isStorageOptimized () &&
2141  nodeAllocSize != this->getNodeNumEntries (),
2142  std::logic_error, "Storage is optimized, but "
2143  "this->getNodeAllocationSize() = " << nodeAllocSize
2144  << " != this->getNodeNumEntries() = " << this->getNodeNumEntries ()
2145  << "." << suffix);
2146  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2147  (! this->haveGlobalConstants_ &&
2148  (this->globalNumEntries_ != GSTI ||
2149  this->globalMaxNumRowEntries_ != GSTI),
2150  std::logic_error, "Graph claims not to have global constants, but "
2151  "some of the global constants are not marked as invalid." << suffix);
2152  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2153  (this->haveGlobalConstants_ &&
2154  (this->globalNumEntries_ == GSTI ||
2155  this->globalMaxNumRowEntries_ == GSTI),
2156  std::logic_error, "Graph claims to have global constants, but "
2157  "some of them are marked as invalid." << suffix);
2158  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2159  (this->haveGlobalConstants_ &&
2160  (this->globalNumEntries_ < this->getNodeNumEntries () ||
2161  this->globalMaxNumRowEntries_ < this->nodeMaxNumRowEntries_),
2162  std::logic_error, "Graph claims to have global constants, and "
2163  "all of the values of the global constants are valid, but "
2164  "some of the local constants are greater than "
2165  "their corresponding global constants." << suffix);
2166  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2167  (this->indicesAreAllocated () &&
2168  (this->numAllocForAllRows_ != 0 ||
2169  this->k_numAllocPerRow_.extent (0) != 0),
2170  std::logic_error, "The graph claims that its indices are allocated, but "
2171  "either numAllocForAllRows_ (= " << this->numAllocForAllRows_ << ") is "
2172  "nonzero, or k_numAllocPerRow_ has nonzero dimension. In other words, "
2173  "the graph is supposed to release its \"allocation specifications\" "
2174  "when it allocates its indices." << suffix);
2175  if (isGloballyIndexed() && k_rowPtrs_.extent(0) != 0) {
2176  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2177  (size_t(k_rowPtrs_.extent(0)) != size_t(lclNumRows + 1),
2178  std::logic_error, "The graph is globally indexed and "
2179  "k_rowPtrs_ has nonzero size " << k_rowPtrs_.extent(0)
2180  << ", but that size does not equal lclNumRows+1 = "
2181  << (lclNumRows+1) << "." << suffix);
2182  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2183  (k_rowPtrs_(lclNumRows) != size_t(k_gblInds1D_.extent(0)),
2184  std::logic_error, "The graph is globally indexed and "
2185  "k_rowPtrs_ has nonzero size " << k_rowPtrs_.extent(0)
2186  << ", but k_rowPtrs_(lclNumRows=" << lclNumRows << ")="
2187  << k_rowPtrs_(lclNumRows) << " != k_gblInds1D_.extent(0)="
2188  << k_gblInds1D_.extent(0) << "." << suffix);
2189  }
2190  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2191  (this->isLocallyIndexed () &&
2192  this->k_rowPtrs_.extent (0) != 0 &&
2193  (static_cast<size_t> (k_rowPtrs_.extent (0)) != static_cast<size_t> (lclNumRows + 1) ||
2194  this->k_rowPtrs_(lclNumRows) != static_cast<size_t> (this->k_lclInds1D_.extent (0))),
2195  std::logic_error, "If k_rowPtrs_ has nonzero size and "
2196  "the graph is locally indexed, then "
2197  "k_rowPtrs_ must have N+1 rows, and "
2198  "k_rowPtrs_(N) must equal k_lclInds1D_.extent(0)." << suffix);
2199 
2200  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2201  (this->indicesAreAllocated () &&
2202  nodeAllocSize > 0 &&
2203  this->k_lclInds1D_.extent (0) == 0 &&
2204  this->k_gblInds1D_.extent (0) == 0,
2205  std::logic_error, "Graph is allocated nontrivially, but "
2206  "but 1-D allocations are not present." << suffix);
2207 
2208  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2209  (! this->indicesAreAllocated () &&
2210  ((this->k_rowPtrs_.extent (0) != 0 ||
2211  this->k_numRowEntries_.extent (0) != 0) ||
2212  this->k_lclInds1D_.extent (0) != 0 ||
2213  this->k_gblInds1D_.extent (0) != 0),
2214  std::logic_error, "If indices are not allocated, "
2215  "then none of the buffers should be." << suffix);
2216  // indices may be local or global only if they are allocated
2217  // (numAllocated is redundant; could simply be indicesAreLocal_ ||
2218  // indicesAreGlobal_)
2219  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2220  ((this->indicesAreLocal_ || this->indicesAreGlobal_) &&
2221  ! this->indicesAreAllocated_,
2222  std::logic_error, "Indices may be local or global only if they are "
2223  "allocated." << suffix);
2224  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2225  (this->indicesAreLocal_ && this->indicesAreGlobal_,
2226  std::logic_error, "Indices may not be both local and global." << suffix);
2227  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2228  (indicesAreLocal_ && k_gblInds1D_.extent (0) != 0,
2229  std::logic_error, "Indices are local, but "
2230  "k_gblInds1D_.extent(0) (= " << k_gblInds1D_.extent (0)
2231  << ") != 0. In other words, if indices are local, then "
2232  "allocations of global indices should not be present."
2233  << suffix);
2234  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2235  (indicesAreGlobal_ && k_lclInds1D_.extent (0) != 0,
2236  std::logic_error, "Indices are global, but "
2237  "k_lclInds1D_.extent(0) (= " << k_lclInds1D_.extent(0)
2238  << ") != 0. In other words, if indices are global, "
2239  "then allocations for local indices should not be present."
2240  << suffix);
2241  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2242  (indicesAreLocal_ && nodeAllocSize > 0 &&
2243  k_lclInds1D_.extent (0) == 0 && getNodeNumRows () > 0,
2244  std::logic_error, "Indices are local and "
2245  "getNodeAllocationSize() = " << nodeAllocSize << " > 0, but "
2246  "k_lclInds1D_.extent(0) = 0 and getNodeNumRows() = "
2247  << getNodeNumRows () << " > 0." << suffix);
2248  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2249  (indicesAreGlobal_ && nodeAllocSize > 0 &&
2250  k_gblInds1D_.extent (0) == 0 && getNodeNumRows () > 0,
2251  std::logic_error, "Indices are global and "
2252  "getNodeAllocationSize() = " << nodeAllocSize << " > 0, but "
2253  "k_gblInds1D_.extent(0) = 0 and getNodeNumRows() = "
2254  << getNodeNumRows () << " > 0." << suffix);
2255  // check the actual allocations
2256  if (this->indicesAreAllocated () &&
2257  this->k_rowPtrs_.extent (0) != 0) {
2258  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2259  (static_cast<size_t> (this->k_rowPtrs_.extent (0)) !=
2260  this->getNodeNumRows () + 1,
2261  std::logic_error, "Indices are allocated and "
2262  "k_rowPtrs_ has nonzero length, but k_rowPtrs_.extent(0) = "
2263  << this->k_rowPtrs_.extent (0) << " != getNodeNumRows()+1 = "
2264  << (this->getNodeNumRows () + 1) << "." << suffix);
2265  const size_t actualNumAllocated =
2266  ::Tpetra::Details::getEntryOnHost (this->k_rowPtrs_, this->getNodeNumRows ());
2267  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2268  (this->isLocallyIndexed () &&
2269  static_cast<size_t> (this->k_lclInds1D_.extent (0)) != actualNumAllocated,
2270  std::logic_error, "Graph is locally indexed, indices are "
2271  "are allocated, and k_rowPtrs_ has nonzero length, but "
2272  "k_lclInds1D_.extent(0) = " << this->k_lclInds1D_.extent (0)
2273  << " != actualNumAllocated = " << actualNumAllocated << suffix);
2274  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2275  (this->isGloballyIndexed () &&
2276  static_cast<size_t> (this->k_gblInds1D_.extent (0)) != actualNumAllocated,
2277  std::logic_error, "Graph is globally indexed, indices "
2278  "are allocated, and k_rowPtrs_ has nonzero length, but "
2279  "k_gblInds1D_.extent(0) = " << this->k_gblInds1D_.extent (0)
2280  << " != actualNumAllocated = " << actualNumAllocated << suffix);
2281  }
2282 
2283  if (verbose_) {
2284  std::ostringstream os;
2285  os << *prefix << "Done" << endl;
2286  std::cerr << os.str();
2287  }
2288  }
2289  }
2290 
2291 
2292  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2293  size_t
2295  getNumEntriesInGlobalRow (GlobalOrdinal globalRow) const
2296  {
2297  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (globalRow);
2298  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2299  return Teuchos::OrdinalTraits<size_t>::invalid ();
2300  }
2301  else {
2302  return rowInfo.numEntries;
2303  }
2304  }
2305 
2306 
2307  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2308  size_t
2310  getNumEntriesInLocalRow (LocalOrdinal localRow) const
2311  {
2312  const RowInfo rowInfo = this->getRowInfo (localRow);
2313  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2314  return Teuchos::OrdinalTraits<size_t>::invalid ();
2315  }
2316  else {
2317  return rowInfo.numEntries;
2318  }
2319  }
2320 
2321 
2322  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2323  size_t
2325  getNumAllocatedEntriesInGlobalRow (GlobalOrdinal globalRow) const
2326  {
2327  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (globalRow);
2328  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2329  return Teuchos::OrdinalTraits<size_t>::invalid ();
2330  }
2331  else {
2332  return rowInfo.allocSize;
2333  }
2334  }
2335 
2336 
2337  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2338  size_t
2340  getNumAllocatedEntriesInLocalRow (LocalOrdinal localRow) const
2341  {
2342  const RowInfo rowInfo = this->getRowInfo (localRow);
2343  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2344  return Teuchos::OrdinalTraits<size_t>::invalid ();
2345  }
2346  else {
2347  return rowInfo.allocSize;
2348  }
2349  }
2350 
2351 
2352  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2353  Teuchos::ArrayRCP<const size_t>
2356  {
2357  using Kokkos::ViewAllocateWithoutInitializing;
2358  using Kokkos::create_mirror_view;
2359  using Teuchos::ArrayRCP;
2360  typedef typename local_graph_type::row_map_type row_map_type;
2361  typedef typename row_map_type::non_const_value_type row_offset_type;
2362  const char prefix[] = "Tpetra::CrsGraph::getNodeRowPtrs: ";
2363  const char suffix[] = " Please report this bug to the Tpetra developers.";
2364 
2365  const size_t size = k_rowPtrs_.extent (0);
2366  constexpr bool same = std::is_same<size_t, row_offset_type>::value;
2367 
2368  if (size == 0) {
2369  return ArrayRCP<const size_t> ();
2370  }
2371 
2372  ArrayRCP<const row_offset_type> ptr_rot;
2373  ArrayRCP<const size_t> ptr_st;
2374  if (same) { // size_t == row_offset_type
2375  // NOTE (mfh 22 Mar 2015) In a debug build of Kokkos, the result
2376  // of create_mirror_view might actually be a new allocation.
2377  // This helps with debugging when there are two memory spaces.
2378  typename row_map_type::HostMirror ptr_h = create_mirror_view (k_rowPtrs_);
2379  Kokkos::deep_copy (ptr_h, k_rowPtrs_);
2380  if (debug_) {
2381  TEUCHOS_TEST_FOR_EXCEPTION
2382  (ptr_h.extent (0) != k_rowPtrs_.extent (0), std::logic_error,
2383  prefix << "size_t == row_offset_type, but ptr_h.extent(0) = "
2384  << ptr_h.extent (0) << " != k_rowPtrs_.extent(0) = "
2385  << k_rowPtrs_.extent (0) << ".");
2386  TEUCHOS_TEST_FOR_EXCEPTION
2387  (same && size != 0 && k_rowPtrs_.data () == nullptr, std::logic_error,
2388  prefix << "size_t == row_offset_type and k_rowPtrs_.extent(0) = "
2389  << size << " != 0, but k_rowPtrs_.data() == nullptr." << suffix);
2390  TEUCHOS_TEST_FOR_EXCEPTION
2391  (same && size != 0 && ptr_h.data () == nullptr, std::logic_error,
2392  prefix << "size_t == row_offset_type and k_rowPtrs_.extent(0) = "
2393  << size << " != 0, but create_mirror_view(k_rowPtrs_).data() "
2394  "== nullptr." << suffix);
2395  }
2396  ptr_rot = Kokkos::Compat::persistingView (ptr_h);
2397  }
2398  else { // size_t != row_offset_type
2399  typedef Kokkos::View<size_t*, device_type> ret_view_type;
2400  ret_view_type ptr_d (ViewAllocateWithoutInitializing ("ptr"), size);
2401  ::Tpetra::Details::copyOffsets (ptr_d, k_rowPtrs_);
2402  typename ret_view_type::HostMirror ptr_h = create_mirror_view (ptr_d);
2403  Kokkos::deep_copy (ptr_h, ptr_d);
2404  ptr_st = Kokkos::Compat::persistingView (ptr_h);
2405  }
2406  if (debug_) {
2407  TEUCHOS_TEST_FOR_EXCEPTION
2408  (same && size != 0 && ptr_rot.is_null (), std::logic_error,
2409  prefix << "size_t == row_offset_type and size = " << size
2410  << " != 0, but ptr_rot is null." << suffix);
2411  TEUCHOS_TEST_FOR_EXCEPTION
2412  (! same && size != 0 && ptr_st.is_null (), std::logic_error,
2413  prefix << "size_t != row_offset_type and size = " << size
2414  << " != 0, but ptr_st is null." << suffix);
2415  }
2416 
2417  // If size_t == row_offset_type, return a persisting host view of
2418  // k_rowPtrs_. Otherwise, return a size_t host copy of k_rowPtrs_.
2419  ArrayRCP<const size_t> retval =
2420  Kokkos::Impl::if_c<same,
2421  ArrayRCP<const row_offset_type>,
2422  ArrayRCP<const size_t> >::select (ptr_rot, ptr_st);
2423  if (debug_) {
2424  TEUCHOS_TEST_FOR_EXCEPTION
2425  (size != 0 && retval.is_null (), std::logic_error,
2426  prefix << "size = " << size << " != 0, but retval is null." << suffix);
2427  }
2428  return retval;
2429  }
2430 
2431 
2432  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2433  Teuchos::ArrayRCP<const LocalOrdinal>
2436  {
2437  return Kokkos::Compat::persistingView (k_lclInds1D_);
2438  }
2439 
2440 
2441  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2442  void
2444  getLocalRowCopy (LocalOrdinal localRow,
2445  const Teuchos::ArrayView<LocalOrdinal>&indices,
2446  size_t& numEntries) const
2447  {
2448  using Teuchos::ArrayView;
2449  typedef LocalOrdinal LO;
2450  typedef GlobalOrdinal GO;
2451  const char tfecfFuncName[] = "getLocalRowCopy: ";
2452 
2453  TEUCHOS_TEST_FOR_EXCEPTION(
2454  isGloballyIndexed () && ! hasColMap (), std::runtime_error,
2455  "Tpetra::CrsGraph::getLocalRowCopy: The graph is globally indexed and "
2456  "does not have a column Map yet. That means we don't have local indices "
2457  "for columns yet, so it doesn't make sense to call this method. If the "
2458  "graph doesn't have a column Map yet, you should call fillComplete on "
2459  "it first.");
2460 
2461  // This does the right thing (reports an empty row) if the input
2462  // row is invalid.
2463  const RowInfo rowinfo = this->getRowInfo (localRow);
2464  // No side effects on error.
2465  const size_t theNumEntries = rowinfo.numEntries;
2466  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2467  (static_cast<size_t> (indices.size ()) < theNumEntries, std::runtime_error,
2468  "Specified storage (size==" << indices.size () << ") does not suffice "
2469  "to hold all " << theNumEntries << " entry/ies for this row.");
2470  numEntries = theNumEntries;
2471 
2472  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2473  if (isLocallyIndexed ()) {
2474  ArrayView<const LO> lview = getLocalView (rowinfo);
2475  for (size_t j = 0; j < theNumEntries; ++j) {
2476  indices[j] = lview[j];
2477  }
2478  }
2479  else if (isGloballyIndexed ()) {
2480  ArrayView<const GO> gview = getGlobalView (rowinfo);
2481  for (size_t j = 0; j < theNumEntries; ++j) {
2482  indices[j] = colMap_->getLocalElement (gview[j]);
2483  }
2484  }
2485  }
2486  }
2487 
2488 
2489  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2490  void
2492  getGlobalRowCopy (GlobalOrdinal globalRow,
2493  const Teuchos::ArrayView<GlobalOrdinal>& indices,
2494  size_t& numEntries) const
2495  {
2496  using Teuchos::ArrayView;
2497  const char tfecfFuncName[] = "getGlobalRowCopy: ";
2498 
2499  // This does the right thing (reports an empty row) if the input
2500  // row is invalid.
2501  const RowInfo rowinfo = getRowInfoFromGlobalRowIndex (globalRow);
2502  const size_t theNumEntries = rowinfo.numEntries;
2503  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2504  static_cast<size_t> (indices.size ()) < theNumEntries, std::runtime_error,
2505  "Specified storage (size==" << indices.size () << ") does not suffice "
2506  "to hold all " << theNumEntries << " entry/ies for this row.");
2507  numEntries = theNumEntries; // first side effect
2508 
2509  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2510  if (isLocallyIndexed ()) {
2511  ArrayView<const LocalOrdinal> lview = getLocalView (rowinfo);
2512  for (size_t j = 0; j < theNumEntries; ++j) {
2513  indices[j] = colMap_->getGlobalElement (lview[j]);
2514  }
2515  }
2516  else if (isGloballyIndexed ()) {
2517  ArrayView<const GlobalOrdinal> gview = getGlobalView (rowinfo);
2518  for (size_t j = 0; j < theNumEntries; ++j) {
2519  indices[j] = gview[j];
2520  }
2521  }
2522  }
2523  }
2524 
2525 
2526  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2527  void
2529  getLocalRowView (const LocalOrdinal localRow,
2530  Teuchos::ArrayView<const LocalOrdinal>& indices) const
2531  {
2532  const char tfecfFuncName[] = "getLocalRowView: ";
2533 
2534  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2535  (isGloballyIndexed (), std::runtime_error, "The graph's indices are "
2536  "currently stored as global indices, so we cannot return a view with "
2537  "local column indices, whether or not the graph has a column Map. If "
2538  "the graph _does_ have a column Map, use getLocalRowCopy() instead.");
2539 
2540  // This does the right thing (reports an empty row) if the input
2541  // row is invalid.
2542  const RowInfo rowInfo = getRowInfo (localRow);
2543  indices = Teuchos::null;
2544  if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2545  rowInfo.numEntries > 0) {
2546  indices = this->getLocalView (rowInfo);
2547  // getLocalView returns a view of the _entire_ row, including
2548  // any extra space at the end (which 1-D unpacked storage
2549  // might have, for example). That's why we have to take a
2550  // subview of the returned view.
2551  indices = indices (0, rowInfo.numEntries);
2552  }
2553 
2554  if (debug_) {
2555  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2556  (static_cast<size_t> (indices.size ()) !=
2557  getNumEntriesInLocalRow (localRow), std::logic_error, "indices.size() "
2558  "= " << indices.size () << " != getNumEntriesInLocalRow(localRow=" <<
2559  localRow << ") = " << getNumEntriesInLocalRow (localRow) <<
2560  ". Please report this bug to the Tpetra developers.");
2561  }
2562  }
2563 
2564 
2565  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2566  void
2568  getGlobalRowView (const GlobalOrdinal globalRow,
2569  Teuchos::ArrayView<const GlobalOrdinal>& indices) const
2570  {
2571  const char tfecfFuncName[] = "getGlobalRowView: ";
2572 
2573  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2574  (isLocallyIndexed (), std::runtime_error, "The graph's indices are "
2575  "currently stored as local indices, so we cannot return a view with "
2576  "global column indices. Use getGlobalRowCopy() instead.");
2577 
2578  // This does the right thing (reports an empty row) if the input
2579  // row is invalid.
2580  const RowInfo rowInfo = getRowInfoFromGlobalRowIndex (globalRow);
2581  indices = Teuchos::null;
2582  if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2583  rowInfo.numEntries > 0) {
2584  indices = (this->getGlobalView (rowInfo)) (0, rowInfo.numEntries);
2585  }
2586 
2587  if (debug_) {
2588  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2589  (static_cast<size_t> (indices.size ()) !=
2590  getNumEntriesInGlobalRow (globalRow),
2591  std::logic_error, "indices.size() = " << indices.size ()
2592  << " != getNumEntriesInGlobalRow(globalRow=" << globalRow << ") = "
2593  << getNumEntriesInGlobalRow (globalRow)
2594  << ". Please report this bug to the Tpetra developers.");
2595  }
2596  }
2597 
2598 
2599  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2600  void
2602  insertLocalIndices (const LocalOrdinal localRow,
2603  const Teuchos::ArrayView<const LocalOrdinal>& indices)
2604  {
2605  const char tfecfFuncName[] = "insertLocalIndices: ";
2606 
2607  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2608  (! isFillActive (), std::runtime_error, "Fill must be active.");
2609  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2610  (isGloballyIndexed (), std::runtime_error,
2611  "Graph indices are global; use insertGlobalIndices().");
2612  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2613  (! hasColMap (), std::runtime_error,
2614  "Cannot insert local indices without a column Map.");
2615  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2616  (! rowMap_->isNodeLocalElement (localRow), std::runtime_error,
2617  "Local row index " << localRow << " is not in the row Map "
2618  "on the calling process.");
2619  if (! indicesAreAllocated ()) {
2620  allocateIndices (LocalIndices, verbose_);
2621  }
2622 
2623  if (debug_) {
2624  // In debug mode, if the graph has a column Map, test whether any
2625  // of the given column indices are not in the column Map. Keep
2626  // track of the invalid column indices so we can tell the user
2627  // about them.
2628  if (hasColMap ()) {
2629  using Teuchos::Array;
2630  using Teuchos::toString;
2631  using std::endl;
2632  typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type size_type;
2633 
2634  const map_type& colMap = *colMap_;
2635  Array<LocalOrdinal> badColInds;
2636  bool allInColMap = true;
2637  for (size_type k = 0; k < indices.size (); ++k) {
2638  if (! colMap.isNodeLocalElement (indices[k])) {
2639  allInColMap = false;
2640  badColInds.push_back (indices[k]);
2641  }
2642  }
2643  if (! allInColMap) {
2644  std::ostringstream os;
2645  os << "Tpetra::CrsGraph::insertLocalIndices: You attempted to insert "
2646  "entries in owned row " << localRow << ", at the following column "
2647  "indices: " << toString (indices) << "." << endl;
2648  os << "Of those, the following indices are not in the column Map on "
2649  "this process: " << toString (badColInds) << "." << endl << "Since "
2650  "the graph has a column Map already, it is invalid to insert entries "
2651  "at those locations.";
2652  TEUCHOS_TEST_FOR_EXCEPTION(! allInColMap, std::invalid_argument, os.str ());
2653  }
2654  }
2655  }
2656 
2657  insertLocalIndicesImpl (localRow, indices);
2658 
2659  if (debug_) {
2660  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2661  (! indicesAreAllocated () || ! isLocallyIndexed (), std::logic_error,
2662  "At the end of insertLocalIndices, ! indicesAreAllocated() || "
2663  "! isLocallyIndexed() is true. Please report this bug to the "
2664  "Tpetra developers.");
2665  }
2666  }
2667 
2668  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2669  void
2671  insertLocalIndices (const LocalOrdinal localRow,
2672  const LocalOrdinal numEnt,
2673  const LocalOrdinal inds[])
2674  {
2675  Teuchos::ArrayView<const LocalOrdinal> indsT (inds, numEnt);
2676  this->insertLocalIndices (localRow, indsT);
2677  }
2678 
2679 
2680  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2681  void
2683  insertGlobalIndices (const GlobalOrdinal gblRow,
2684  const LocalOrdinal numInputInds,
2685  const GlobalOrdinal inputGblColInds[])
2686  {
2687  typedef LocalOrdinal LO;
2688  const char tfecfFuncName[] = "insertGlobalIndices: ";
2689 
2690  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2691  (this->isLocallyIndexed (), std::runtime_error,
2692  "graph indices are local; use insertLocalIndices().");
2693  // This can't really be satisfied for now, because if we are
2694  // fillComplete(), then we are local. In the future, this may
2695  // change. However, the rule that modification require active
2696  // fill will not change.
2697  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2698  (! this->isFillActive (), std::runtime_error,
2699  "You are not allowed to call this method if fill is not active. "
2700  "If fillComplete has been called, you must first call resumeFill "
2701  "before you may insert indices.");
2702  if (! indicesAreAllocated ()) {
2703  allocateIndices (GlobalIndices, verbose_);
2704  }
2705  const LO lclRow = this->rowMap_->getLocalElement (gblRow);
2706  if (lclRow != Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
2707  if (debug_) {
2708  if (this->hasColMap ()) {
2709  using std::endl;
2710  const map_type& colMap = * (this->colMap_);
2711  // In a debug build, keep track of the nonowned ("bad") column
2712  // indices, so that we can display them in the exception
2713  // message. In a release build, just ditch the loop early if
2714  // we encounter a nonowned column index.
2715  std::vector<GlobalOrdinal> badColInds;
2716  bool allInColMap = true;
2717  for (LO k = 0; k < numInputInds; ++k) {
2718  if (! colMap.isNodeGlobalElement (inputGblColInds[k])) {
2719  allInColMap = false;
2720  badColInds.push_back (inputGblColInds[k]);
2721  }
2722  }
2723  if (! allInColMap) {
2724  std::ostringstream os;
2725  os << "You attempted to insert entries in owned row " << gblRow
2726  << ", at the following column indices: [";
2727  for (LO k = 0; k < numInputInds; ++k) {
2728  os << inputGblColInds[k];
2729  if (k + static_cast<LO> (1) < numInputInds) {
2730  os << ",";
2731  }
2732  }
2733  os << "]." << endl << "Of those, the following indices are not in "
2734  "the column Map on this process: [";
2735  for (size_t k = 0; k < badColInds.size (); ++k) {
2736  os << badColInds[k];
2737  if (k + size_t (1) < badColInds.size ()) {
2738  os << ",";
2739  }
2740  }
2741  os << "]." << endl << "Since the matrix has a column Map already, "
2742  "it is invalid to insert entries at those locations.";
2743  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2744  (true, std::invalid_argument, os.str ());
2745  }
2746  }
2747  } // debug_
2748  this->insertGlobalIndicesImpl (lclRow, inputGblColInds, numInputInds);
2749  }
2750  else { // a nonlocal row
2751  this->insertGlobalIndicesIntoNonownedRows (gblRow, inputGblColInds,
2752  numInputInds);
2753  }
2754  }
2755 
2756 
2757  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2758  void
2760  insertGlobalIndices (const GlobalOrdinal gblRow,
2761  const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds)
2762  {
2763  this->insertGlobalIndices (gblRow, inputGblColInds.size (),
2764  inputGblColInds.getRawPtr ());
2765  }
2766 
2767 
2768  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2769  void
2771  insertGlobalIndicesFiltered (const LocalOrdinal lclRow,
2772  const GlobalOrdinal gblColInds[],
2773  const LocalOrdinal numGblColInds)
2774  {
2775  typedef LocalOrdinal LO;
2776  typedef GlobalOrdinal GO;
2777  const char tfecfFuncName[] = "insertGlobalIndicesFiltered: ";
2778 
2779  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2780  (this->isLocallyIndexed (), std::runtime_error,
2781  "Graph indices are local; use insertLocalIndices().");
2782  // This can't really be satisfied for now, because if we are
2783  // fillComplete(), then we are local. In the future, this may
2784  // change. However, the rule that modification require active
2785  // fill will not change.
2786  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2787  (! this->isFillActive (), std::runtime_error,
2788  "You are not allowed to call this method if fill is not active. "
2789  "If fillComplete has been called, you must first call resumeFill "
2790  "before you may insert indices.");
2791  if (! indicesAreAllocated ()) {
2792  allocateIndices (GlobalIndices, verbose_);
2793  }
2794 
2795  Teuchos::ArrayView<const GO> gblColInds_av (gblColInds, numGblColInds);
2796  // If we have a column Map, use it to filter the entries.
2797  if (! colMap_.is_null ()) {
2798  const map_type& colMap = * (this->colMap_);
2799 
2800  LO curOffset = 0;
2801  while (curOffset < numGblColInds) {
2802  // Find a sequence of input indices that are in the column Map
2803  // on the calling process. Doing a sequence at a time,
2804  // instead of one at a time, amortizes some overhead.
2805  LO endOffset = curOffset;
2806  for ( ; endOffset < numGblColInds; ++endOffset) {
2807  const LO lclCol = colMap.getLocalElement (gblColInds[endOffset]);
2808  if (lclCol == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
2809  break; // first entry, in current sequence, not in the column Map
2810  }
2811  }
2812  // curOffset, endOffset: half-exclusive range of indices in
2813  // the column Map on the calling process. If endOffset ==
2814  // curOffset, the range is empty.
2815  const LO numIndInSeq = (endOffset - curOffset);
2816  if (numIndInSeq != 0) {
2817  this->insertGlobalIndicesImpl (lclRow, gblColInds + curOffset,
2818  numIndInSeq);
2819  }
2820  // Invariant before this line: Either endOffset ==
2821  // numGblColInds, or gblColInds[endOffset] is not in the
2822  // column Map on the calling process.
2823  curOffset = endOffset + 1;
2824  }
2825  }
2826  else {
2827  this->insertGlobalIndicesImpl (lclRow, gblColInds_av.getRawPtr (),
2828  gblColInds_av.size ());
2829  }
2830  }
2831 
2832  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2833  void
2835  insertGlobalIndicesIntoNonownedRows (const GlobalOrdinal gblRow,
2836  const GlobalOrdinal gblColInds[],
2837  const LocalOrdinal numGblColInds)
2838  {
2839  // This creates the std::vector if it doesn't exist yet.
2840  // std::map's operator[] does a lookup each time, so it's better
2841  // to pull nonlocals_[grow] out of the loop.
2842  std::vector<GlobalOrdinal>& nonlocalRow = this->nonlocals_[gblRow];
2843  for (LocalOrdinal k = 0; k < numGblColInds; ++k) {
2844  // FIXME (mfh 20 Jul 2017) Would be better to use a set, in
2845  // order to avoid duplicates. globalAssemble() sorts these
2846  // anyway.
2847  nonlocalRow.push_back (gblColInds[k]);
2848  }
2849  }
2850 
2851  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2852  void
2854  removeLocalIndices (LocalOrdinal lrow)
2855  {
2856  const char tfecfFuncName[] = "removeLocalIndices: ";
2857  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2858  ! isFillActive (), std::runtime_error, "requires that fill is active.");
2859  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2860  isStorageOptimized (), std::runtime_error,
2861  "cannot remove indices after optimizeStorage() has been called.");
2862  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2863  isGloballyIndexed (), std::runtime_error, "graph indices are global.");
2864  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2865  ! rowMap_->isNodeLocalElement (lrow), std::runtime_error,
2866  "Local row " << lrow << " is not in the row Map on the calling process.");
2867  if (! indicesAreAllocated ()) {
2868  allocateIndices (LocalIndices, verbose_);
2869  }
2870 
2871  // FIXME (mfh 13 Aug 2014) What if they haven't been cleared on
2872  // all processes?
2873  clearGlobalConstants ();
2874 
2875  if (k_numRowEntries_.extent (0) != 0) {
2876  this->k_numRowEntries_(lrow) = 0;
2877  }
2878 
2879  if (debug_) {
2880  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2881  (getNumEntriesInLocalRow (lrow) != 0 ||
2882  ! indicesAreAllocated () ||
2883  ! isLocallyIndexed (), std::logic_error,
2884  "Violated stated post-conditions. Please contact Tpetra team.");
2885  }
2886  }
2887 
2888 
2889  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2890  void
2892  setAllIndices (const typename local_graph_type::row_map_type& rowPointers,
2893  const typename local_graph_type::entries_type::non_const_type& columnIndices)
2894  {
2895  const char tfecfFuncName[] = "setAllIndices: ";
2896  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2897  ! hasColMap () || getColMap ().is_null (), std::runtime_error,
2898  "The graph must have a column Map before you may call this method.");
2899  LocalOrdinal numLocalRows = this->getNodeNumRows ();
2900  {
2901  LocalOrdinal rowPtrLen = rowPointers.size();
2902  if(numLocalRows == 0) {
2903  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2904  rowPtrLen != 0 && rowPtrLen != 1,
2905  std::runtime_error, "Have 0 local rows, but rowPointers.size() is neither 0 nor 1.");
2906  }
2907  else {
2908  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2909  rowPtrLen != numLocalRows + 1,
2910  std::runtime_error, "rowPointers.size() = " << rowPtrLen <<
2911  " != this->getNodeNumRows()+1 = " << (numLocalRows + 1) << ".");
2912  }
2913  }
2914 
2915  if (debug_ && this->isSorted()) {
2916  // Verify that the local indices are actually sorted
2917  int notSorted = 0;
2918  using exec_space = typename local_graph_type::execution_space;
2919  using size_type = typename local_graph_type::size_type;
2920  Kokkos::parallel_reduce(Kokkos::RangePolicy<exec_space>(0, numLocalRows),
2921  KOKKOS_LAMBDA (const LocalOrdinal i, int& lNotSorted)
2922  {
2923  size_type rowBegin = rowPointers(i);
2924  size_type rowEnd = rowPointers(i + 1);
2925  for(size_type j = rowBegin + 1; j < rowEnd; j++)
2926  {
2927  if(columnIndices(j - 1) > columnIndices(j))
2928  {
2929  lNotSorted = 1;
2930  }
2931  }
2932  }, notSorted);
2933  //All-reduce notSorted to avoid rank divergence
2934  int globalNotSorted = 0;
2935  auto comm = this->getComm();
2936  Teuchos::reduceAll<int, int> (*comm, Teuchos::REDUCE_MAX, notSorted,
2937  Teuchos::outArg (globalNotSorted));
2938  if (globalNotSorted)
2939  {
2940  std::string message;
2941  if (notSorted)
2942  {
2943  //Only print message from ranks with the problem
2944  message = std::string("ERROR, rank ") + std::to_string(comm->getRank()) + ", CrsGraph::setAllIndices(): provided columnIndices are not sorted!\n";
2945  }
2946  Details::gathervPrint(std::cout, message, *comm);
2947  throw std::invalid_argument("CrsGraph::setAllIndices(): provided columnIndices are not sorted within rows on at least one process.");
2948  }
2949  }
2950 
2951  // FIXME (mfh 07 Aug 2014) We need to relax this restriction,
2952  // since the future model will be allocation at construction, not
2953  // lazy allocation on first insert.
2954  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2955  ((this->k_lclInds1D_.extent (0) != 0 || this->k_gblInds1D_.extent (0) != 0),
2956  std::runtime_error, "You may not call this method if 1-D data "
2957  "structures are already allocated.");
2958 
2959  indicesAreAllocated_ = true;
2960  indicesAreLocal_ = true;
2961  indicesAreSorted_ = true;
2962  noRedundancies_ = true;
2963  k_lclInds1D_ = columnIndices;
2964  k_rowPtrs_ = rowPointers;
2965  // Storage MUST be packed, since the interface doesn't give any
2966  // way to indicate any extra space at the end of each row.
2967  storageStatus_ = Details::STORAGE_1D_PACKED;
2968 
2969  // Build the local graph.
2970  lclGraph_ = local_graph_type (k_lclInds1D_, k_rowPtrs_);
2971 
2972  // These normally get cleared out at the end of allocateIndices.
2973  // It makes sense to clear them out here, because at the end of
2974  // this method, the graph is allocated on the calling process.
2975  numAllocForAllRows_ = 0;
2976  k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
2977 
2978  checkInternalState ();
2979  }
2980 
2981 
2982  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2983  void
2985  setAllIndices (const Teuchos::ArrayRCP<size_t>& rowPointers,
2986  const Teuchos::ArrayRCP<LocalOrdinal>& columnIndices)
2987  {
2988  using Kokkos::View;
2989  typedef typename local_graph_type::row_map_type row_map_type;
2990  typedef typename row_map_type::array_layout layout_type;
2991  typedef typename row_map_type::non_const_value_type row_offset_type;
2992  typedef View<size_t*, layout_type , Kokkos::HostSpace,
2993  Kokkos::MemoryUnmanaged> input_view_type;
2994  typedef typename row_map_type::non_const_type nc_row_map_type;
2995 
2996  const size_t size = static_cast<size_t> (rowPointers.size ());
2997  constexpr bool same = std::is_same<size_t, row_offset_type>::value;
2998  input_view_type ptr_in (rowPointers.getRawPtr (), size);
2999 
3000  nc_row_map_type ptr_rot ("Tpetra::CrsGraph::ptr", size);
3001 
3002  if (same) { // size_t == row_offset_type
3003  // This compile-time logic ensures that the compiler never sees
3004  // an assignment of View<row_offset_type*, ...> to View<size_t*,
3005  // ...> unless size_t == row_offset_type.
3006  input_view_type ptr_decoy (rowPointers.getRawPtr (), size); // never used
3007  Kokkos::deep_copy (Kokkos::Impl::if_c<same,
3008  nc_row_map_type,
3009  input_view_type>::select (ptr_rot, ptr_decoy),
3010  ptr_in);
3011  }
3012  else { // size_t != row_offset_type
3013  // CudaUvmSpace != HostSpace, so this will be false in that case.
3014  constexpr bool inHostMemory =
3015  std::is_same<typename row_map_type::memory_space,
3016  Kokkos::HostSpace>::value;
3017  if (inHostMemory) {
3018  // Copy (with cast from size_t to row_offset_type, with bounds
3019  // checking if necessary) to ptr_rot.
3020  ::Tpetra::Details::copyOffsets (ptr_rot, ptr_in);
3021  }
3022  else { // Copy input row offsets to device first.
3023  //
3024  // FIXME (mfh 24 Mar 2015) If CUDA UVM, running in the host's
3025  // execution space would avoid the double copy.
3026  //
3027  View<size_t*, layout_type ,execution_space > ptr_st ("Tpetra::CrsGraph::ptr", size);
3028  Kokkos::deep_copy (ptr_st, ptr_in);
3029  // Copy on device (casting from size_t to row_offset_type,
3030  // with bounds checking if necessary) to ptr_rot. This
3031  // executes in the output View's execution space, which is the
3032  // same as execution_space.
3033  ::Tpetra::Details::copyOffsets (ptr_rot, ptr_st);
3034  }
3035  }
3036 
3037  Kokkos::View<LocalOrdinal*, layout_type , execution_space > k_ind =
3038  Kokkos::Compat::getKokkosViewDeepCopy<device_type> (columnIndices ());
3039  setAllIndices (ptr_rot, k_ind);
3040  }
3041 
3042 
3043  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3044  void
3046  getNumEntriesPerLocalRowUpperBound (Teuchos::ArrayRCP<const size_t>& boundPerLocalRow,
3047  size_t& boundForAllLocalRows,
3048  bool& boundSameForAllLocalRows) const
3049  {
3050  const char tfecfFuncName[] = "getNumEntriesPerLocalRowUpperBound: ";
3051  const char suffix[] = " Please report this bug to the Tpetra developers.";
3052 
3053  // The three output arguments. We assign them to the actual
3054  // output arguments at the end, in order to implement
3055  // transactional semantics.
3056  Teuchos::ArrayRCP<const size_t> numEntriesPerRow;
3057  size_t numEntriesForAll = 0;
3058  bool allRowsSame = true;
3059 
3060  const ptrdiff_t numRows = static_cast<ptrdiff_t> (this->getNodeNumRows ());
3061 
3062  if (this->indicesAreAllocated ()) {
3063  if (this->isStorageOptimized ()) {
3064  // left with the case that we have optimized storage. in this
3065  // case, we have to construct a list of row sizes.
3066  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3067  (numRows != 0 && k_rowPtrs_.extent (0) == 0, std::logic_error,
3068  "The graph has " << numRows << " (> 0) row"
3069  << (numRows != 1 ? "s" : "") << " on the calling process, "
3070  "but the k_rowPtrs_ array has zero entries." << suffix);
3071  Teuchos::ArrayRCP<size_t> numEnt;
3072  if (numRows != 0) {
3073  numEnt = Teuchos::arcp<size_t> (numRows);
3074  }
3075 
3076  // We have to iterate through the row offsets anyway, so we
3077  // might as well check whether all rows' bounds are the same.
3078  bool allRowsReallySame = false;
3079  for (ptrdiff_t i = 0; i < numRows; ++i) {
3080  numEnt[i] = this->k_rowPtrs_(i+1) - this->k_rowPtrs_(i);
3081  if (i != 0 && numEnt[i] != numEnt[i-1]) {
3082  allRowsReallySame = false;
3083  }
3084  }
3085  if (allRowsReallySame) {
3086  if (numRows == 0) {
3087  numEntriesForAll = 0;
3088  } else {
3089  numEntriesForAll = numEnt[1] - numEnt[0];
3090  }
3091  allRowsSame = true;
3092  }
3093  else {
3094  numEntriesPerRow = numEnt; // Teuchos::arcp_const_cast<const size_t> (numEnt);
3095  allRowsSame = false; // conservatively; we don't check the array
3096  }
3097  }
3098  else if (k_numRowEntries_.extent (0) != 0) {
3099  // This is a shallow copy; the ArrayRCP wraps the View in a
3100  // custom destructor, which ensures correct deallocation if
3101  // that is the only reference to the View. Furthermore, this
3102  // View is a host View, so this doesn't assume UVM.
3103  numEntriesPerRow = Kokkos::Compat::persistingView (k_numRowEntries_);
3104  allRowsSame = false; // conservatively; we don't check the array
3105  }
3106  else {
3107  numEntriesForAll = 0;
3108  allRowsSame = true;
3109  }
3110  }
3111  else { // indices not allocated
3112  if (k_numAllocPerRow_.extent (0) != 0) {
3113  // This is a shallow copy; the ArrayRCP wraps the View in a
3114  // custom destructor, which ensures correct deallocation if
3115  // that is the only reference to the View. Furthermore, this
3116  // View is a host View, so this doesn't assume UVM.
3117  numEntriesPerRow = Kokkos::Compat::persistingView (k_numAllocPerRow_);
3118  allRowsSame = false; // conservatively; we don't check the array
3119  }
3120  else {
3121  numEntriesForAll = numAllocForAllRows_;
3122  allRowsSame = true;
3123  }
3124  }
3125 
3126  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3127  (numEntriesForAll != 0 && numEntriesPerRow.size () != 0, std::logic_error,
3128  "numEntriesForAll and numEntriesPerRow are not consistent. The former "
3129  "is nonzero (" << numEntriesForAll << "), but the latter has nonzero "
3130  "size " << numEntriesPerRow.size () << "." << suffix);
3131  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3132  (numEntriesForAll != 0 && ! allRowsSame, std::logic_error,
3133  "numEntriesForAll and allRowsSame are not consistent. The former "
3134  "is nonzero (" << numEntriesForAll << "), but the latter is false."
3135  << suffix);
3136  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3137  (numEntriesPerRow.size () != 0 && allRowsSame, std::logic_error,
3138  "numEntriesPerRow and allRowsSame are not consistent. The former has "
3139  "nonzero length " << numEntriesForAll << ", but the latter is true."
3140  << suffix);
3141 
3142  boundPerLocalRow = numEntriesPerRow;
3143  boundForAllLocalRows = numEntriesForAll;
3144  boundSameForAllLocalRows = allRowsSame;
3145  }
3146 
3147 
3148  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3149  void
3152  {
3153  using Teuchos::Comm;
3154  using Teuchos::outArg;
3155  using Teuchos::RCP;
3156  using Teuchos::rcp;
3157  using Teuchos::REDUCE_MAX;
3158  using Teuchos::REDUCE_MIN;
3159  using Teuchos::reduceAll;
3160  using std::endl;
3161  using crs_graph_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
3162  using LO = local_ordinal_type;
3163  using GO = global_ordinal_type;
3164  using size_type = typename Teuchos::Array<GO>::size_type;
3165  const char tfecfFuncName[] = "globalAssemble: "; // for exception macro
3166 
3167  std::unique_ptr<std::string> prefix;
3168  if (verbose_) {
3169  prefix = this->createPrefix("CrsGraph", "globalAssemble");
3170  std::ostringstream os;
3171  os << *prefix << "Start" << endl;
3172  std::cerr << os.str();
3173  }
3174  RCP<const Comm<int> > comm = getComm ();
3175 
3176  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3177  (! isFillActive (), std::runtime_error, "Fill must be active before "
3178  "you may call this method.");
3179 
3180  const size_t myNumNonlocalRows = this->nonlocals_.size ();
3181 
3182  // If no processes have nonlocal rows, then we don't have to do
3183  // anything. Checking this is probably cheaper than constructing
3184  // the Map of nonlocal rows (see below) and noticing that it has
3185  // zero global entries.
3186  {
3187  const int iHaveNonlocalRows = (myNumNonlocalRows == 0) ? 0 : 1;
3188  int someoneHasNonlocalRows = 0;
3189  reduceAll<int, int> (*comm, REDUCE_MAX, iHaveNonlocalRows,
3190  outArg (someoneHasNonlocalRows));
3191  if (someoneHasNonlocalRows == 0) {
3192  if (verbose_) {
3193  std::ostringstream os;
3194  os << *prefix << "Done: No nonlocal rows" << endl;
3195  std::cerr << os.str();
3196  }
3197  return;
3198  }
3199  else if (verbose_) {
3200  std::ostringstream os;
3201  os << *prefix << "At least 1 process has nonlocal rows"
3202  << endl;
3203  std::cerr << os.str();
3204  }
3205  }
3206 
3207  // 1. Create a list of the "nonlocal" rows on each process. this
3208  // requires iterating over nonlocals_, so while we do this,
3209  // deduplicate the entries and get a count for each nonlocal
3210  // row on this process.
3211  // 2. Construct a new row Map corresponding to those rows. This
3212  // Map is likely overlapping. We know that the Map is not
3213  // empty on all processes, because the above all-reduce and
3214  // return exclude that case.
3215 
3216  RCP<const map_type> nonlocalRowMap;
3217  // Keep this for CrsGraph's constructor.
3218  Teuchos::Array<size_t> numEntPerNonlocalRow (myNumNonlocalRows);
3219  {
3220  Teuchos::Array<GO> myNonlocalGblRows (myNumNonlocalRows);
3221  size_type curPos = 0;
3222  for (auto mapIter = this->nonlocals_.begin ();
3223  mapIter != this->nonlocals_.end ();
3224  ++mapIter, ++curPos) {
3225  myNonlocalGblRows[curPos] = mapIter->first;
3226  std::vector<GO>& gblCols = mapIter->second; // by ref; change in place
3227  std::sort (gblCols.begin (), gblCols.end ());
3228  auto vecLast = std::unique (gblCols.begin (), gblCols.end ());
3229  gblCols.erase (vecLast, gblCols.end ());
3230  numEntPerNonlocalRow[curPos] = gblCols.size ();
3231  }
3232 
3233  // Currently, Map requires that its indexBase be the global min
3234  // of all its global indices. Map won't compute this for us, so
3235  // we must do it. If our process has no nonlocal rows, set the
3236  // "min" to the max possible GO value. This ensures that if
3237  // some process has at least one nonlocal row, then it will pick
3238  // that up as the min. We know that at least one process has a
3239  // nonlocal row, since the all-reduce and return at the top of
3240  // this method excluded that case.
3241  GO myMinNonlocalGblRow = std::numeric_limits<GO>::max ();
3242  {
3243  auto iter = std::min_element (myNonlocalGblRows.begin (),
3244  myNonlocalGblRows.end ());
3245  if (iter != myNonlocalGblRows.end ()) {
3246  myMinNonlocalGblRow = *iter;
3247  }
3248  }
3249  GO gblMinNonlocalGblRow = 0;
3250  reduceAll<int, GO> (*comm, REDUCE_MIN, myMinNonlocalGblRow,
3251  outArg (gblMinNonlocalGblRow));
3252  const GO indexBase = gblMinNonlocalGblRow;
3253  const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid ();
3254  nonlocalRowMap = rcp (new map_type (INV, myNonlocalGblRows (), indexBase, comm));
3255  }
3256 
3257  if (verbose_) {
3258  std::ostringstream os;
3259  os << *prefix << "nonlocalRowMap->getIndexBase()="
3260  << nonlocalRowMap->getIndexBase() << endl;
3261  std::cerr << os.str();
3262  }
3263 
3264  // 3. Use the column indices for each nonlocal row, as stored in
3265  // nonlocals_, to construct a CrsGraph corresponding to
3266  // nonlocal rows. We need, but we have, exact counts of the
3267  // number of entries in each nonlocal row.
3268 
3269  RCP<crs_graph_type> nonlocalGraph =
3270  rcp(new crs_graph_type(nonlocalRowMap, numEntPerNonlocalRow(),
3271  StaticProfile));
3272  {
3273  size_type curPos = 0;
3274  for (auto mapIter = this->nonlocals_.begin ();
3275  mapIter != this->nonlocals_.end ();
3276  ++mapIter, ++curPos) {
3277  const GO gblRow = mapIter->first;
3278  std::vector<GO>& gblCols = mapIter->second; // by ref just to avoid copy
3279  const LO numEnt = static_cast<LO> (numEntPerNonlocalRow[curPos]);
3280  nonlocalGraph->insertGlobalIndices (gblRow, numEnt, gblCols.data ());
3281  }
3282  }
3283  if (verbose_) {
3284  std::ostringstream os;
3285  os << *prefix << "Built nonlocal graph" << endl;
3286  std::cerr << os.str();
3287  }
3288  // There's no need to fill-complete the nonlocals graph.
3289  // We just use it as a temporary container for the Export.
3290 
3291  // 4. If the original row Map is one to one, then we can Export
3292  // directly from nonlocalGraph into this. Otherwise, we have
3293  // to create a temporary graph with a one-to-one row Map,
3294  // Export into that, then Import from the temporary graph into
3295  // *this.
3296 
3297  auto origRowMap = this->getRowMap ();
3298  const bool origRowMapIsOneToOne = origRowMap->isOneToOne ();
3299 
3300  if (origRowMapIsOneToOne) {
3301  if (verbose_) {
3302  std::ostringstream os;
3303  os << *prefix << "Original row Map is 1-to-1" << endl;
3304  std::cerr << os.str();
3305  }
3306  export_type exportToOrig (nonlocalRowMap, origRowMap);
3307  this->doExport (*nonlocalGraph, exportToOrig, Tpetra::INSERT);
3308  // We're done at this point!
3309  }
3310  else {
3311  if (verbose_) {
3312  std::ostringstream os;
3313  os << *prefix << "Original row Map is NOT 1-to-1" << endl;
3314  std::cerr << os.str();
3315  }
3316  // If you ask a Map whether it is one to one, it does some
3317  // communication and stashes intermediate results for later use
3318  // by createOneToOne. Thus, calling createOneToOne doesn't cost
3319  // much more then the original cost of calling isOneToOne.
3320  auto oneToOneRowMap = Tpetra::createOneToOne (origRowMap);
3321  export_type exportToOneToOne (nonlocalRowMap, oneToOneRowMap);
3322 
3323  // Create a temporary graph with the one-to-one row Map.
3324  //
3325  // TODO (mfh 09 Sep 2016) Estimate the number of entries in each
3326  // row, to avoid reallocation during the Export operation.
3327  crs_graph_type oneToOneGraph (oneToOneRowMap, 0);
3328 
3329  // Export from graph of nonlocals into the temp one-to-one graph.
3330  if (verbose_) {
3331  std::ostringstream os;
3332  os << *prefix << "Export nonlocal graph" << endl;
3333  std::cerr << os.str();
3334  }
3335  oneToOneGraph.doExport (*nonlocalGraph, exportToOneToOne, Tpetra::INSERT);
3336 
3337  // We don't need the graph of nonlocals anymore, so get rid of
3338  // it, to keep the memory high-water mark down.
3339  nonlocalGraph = Teuchos::null;
3340 
3341  // Import from the one-to-one graph to the original graph.
3342  import_type importToOrig (oneToOneRowMap, origRowMap);
3343  if (verbose_) {
3344  std::ostringstream os;
3345  os << *prefix << "Import nonlocal graph" << endl;
3346  std::cerr << os.str();
3347  }
3348  this->doImport (oneToOneGraph, importToOrig, Tpetra::INSERT);
3349  }
3350 
3351  // It's safe now to clear out nonlocals_, since we've already
3352  // committed side effects to *this. The standard idiom for
3353  // clearing a Container like std::map, is to swap it with an empty
3354  // Container and let the swapped Container fall out of scope.
3355  decltype (this->nonlocals_) newNonlocals;
3356  std::swap (this->nonlocals_, newNonlocals);
3357 
3358  checkInternalState ();
3359  if (verbose_) {
3360  std::ostringstream os;
3361  os << *prefix << "Done" << endl;
3362  std::cerr << os.str();
3363  }
3364  }
3365 
3366 
3367  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3368  void
3370  resumeFill (const Teuchos::RCP<Teuchos::ParameterList>& params)
3371  {
3372  clearGlobalConstants();
3373  if (params != Teuchos::null) this->setParameterList (params);
3374  // either still sorted/merged or initially sorted/merged
3375  indicesAreSorted_ = true;
3376  noRedundancies_ = true;
3377  fillComplete_ = false;
3378  }
3379 
3380 
3381  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3382  void
3384  fillComplete (const Teuchos::RCP<Teuchos::ParameterList>& params)
3385  {
3386  // If the graph already has domain and range Maps, don't clobber
3387  // them. If it doesn't, use the current row Map for both the
3388  // domain and range Maps.
3389  //
3390  // NOTE (mfh 28 Sep 2014): If the graph was constructed without a
3391  // column Map, and column indices are inserted which are not in
3392  // the row Map on any process, this will cause troubles. However,
3393  // that is not a common case for most applications that we
3394  // encounter, and checking for it might require more
3395  // communication.
3396  Teuchos::RCP<const map_type> domMap = this->getDomainMap ();
3397  if (domMap.is_null ()) {
3398  domMap = this->getRowMap ();
3399  }
3400  Teuchos::RCP<const map_type> ranMap = this->getRangeMap ();
3401  if (ranMap.is_null ()) {
3402  ranMap = this->getRowMap ();
3403  }
3404  this->fillComplete (domMap, ranMap, params);
3405  }
3406 
3407 
3408  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3409  void
3411  fillComplete (const Teuchos::RCP<const map_type>& domainMap,
3412  const Teuchos::RCP<const map_type>& rangeMap,
3413  const Teuchos::RCP<Teuchos::ParameterList>& params)
3414  {
3415  using std::endl;
3416  const char tfecfFuncName[] = "fillComplete: ";
3417  const bool verbose = verbose_;
3418 
3419  std::unique_ptr<std::string> prefix;
3420  if (verbose) {
3421  prefix = this->createPrefix("CrsGraph", "fillComplete");
3422  std::ostringstream os;
3423  os << *prefix << "Start" << endl;
3424  std::cerr << os.str();
3425  }
3426 
3427  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3428  (! isFillActive () || isFillComplete (), std::runtime_error,
3429  "Graph fill state must be active (isFillActive() "
3430  "must be true) before calling fillComplete().");
3431 
3432  const int numProcs = getComm ()->getSize ();
3433 
3434  //
3435  // Read and set parameters
3436  //
3437 
3438  // Does the caller want to sort remote GIDs (within those owned by
3439  // the same process) in makeColMap()?
3440  if (! params.is_null ()) {
3441  if (params->isParameter ("sort column map ghost gids")) {
3442  sortGhostsAssociatedWithEachProcessor_ =
3443  params->get<bool> ("sort column map ghost gids",
3444  sortGhostsAssociatedWithEachProcessor_);
3445  }
3446  else if (params->isParameter ("Sort column Map ghost GIDs")) {
3447  sortGhostsAssociatedWithEachProcessor_ =
3448  params->get<bool> ("Sort column Map ghost GIDs",
3449  sortGhostsAssociatedWithEachProcessor_);
3450  }
3451  }
3452 
3453  // If true, the caller promises that no process did nonlocal
3454  // changes since the last call to fillComplete.
3455  bool assertNoNonlocalInserts = false;
3456  if (! params.is_null ()) {
3457  assertNoNonlocalInserts =
3458  params->get<bool> ("No Nonlocal Changes", assertNoNonlocalInserts);
3459  }
3460 
3461  //
3462  // Allocate indices, if they haven't already been allocated
3463  //
3464  if (! indicesAreAllocated ()) {
3465  if (hasColMap ()) {
3466  // We have a column Map, so use local indices.
3467  allocateIndices (LocalIndices, verbose);
3468  } else {
3469  // We don't have a column Map, so use global indices.
3470  allocateIndices (GlobalIndices, verbose);
3471  }
3472  }
3473 
3474  //
3475  // Do global assembly, if requested and if the communicator
3476  // contains more than one process.
3477  //
3478  const bool mayNeedGlobalAssemble = ! assertNoNonlocalInserts && numProcs > 1;
3479  if (mayNeedGlobalAssemble) {
3480  // This first checks if we need to do global assembly.
3481  // The check costs a single all-reduce.
3482  globalAssemble ();
3483  }
3484  else {
3485  const size_t numNonlocals = nonlocals_.size();
3486  if (verbose) {
3487  std::ostringstream os;
3488  os << *prefix << "Do not need to call globalAssemble; "
3489  "assertNoNonlocalInserts="
3490  << (assertNoNonlocalInserts ? "true" : "false")
3491  << "numProcs=" << numProcs
3492  << ", nonlocals_.size()=" << numNonlocals << endl;
3493  std::cerr << os.str();
3494  }
3495  const int lclNeededGlobalAssemble =
3496  (numProcs > 1 && numNonlocals != 0) ? 1 : 0;
3497  if (lclNeededGlobalAssemble != 0 && verbose) {
3498  std::ostringstream os;
3499  os << *prefix;
3500  Details::Impl::verbosePrintMap(
3501  os, nonlocals_.begin(), nonlocals_.end(),
3502  nonlocals_.size(), "nonlocals_");
3503  std::cerr << os.str() << endl;
3504  }
3505 
3506  if (debug_) {
3507  auto map = this->getMap();
3508  auto comm = map.is_null() ? Teuchos::null : map->getComm();
3509  int gblNeededGlobalAssemble = lclNeededGlobalAssemble;
3510  if (! comm.is_null()) {
3511  using Teuchos::REDUCE_MAX;
3512  using Teuchos::reduceAll;
3513  reduceAll(*comm, REDUCE_MAX, lclNeededGlobalAssemble,
3514  Teuchos::outArg(gblNeededGlobalAssemble));
3515  }
3516  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3517  (gblNeededGlobalAssemble != 0, std::runtime_error,
3518  "nonlocals_.size()=" << numNonlocals << " != 0 on at "
3519  "least one process in the CrsGraph's communicator. This "
3520  "means either that you incorrectly set the "
3521  "\"No Nonlocal Changes\" fillComplete parameter to true, "
3522  "or that you inserted invalid entries. "
3523  "Rerun with the environment variable TPETRA_VERBOSE="
3524  "CrsGraph set to see the entries of nonlocals_ on every "
3525  "MPI process (WARNING: lots of output).");
3526  }
3527  else {
3528  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3529  (lclNeededGlobalAssemble != 0, std::runtime_error,
3530  "nonlocals_.size()=" << numNonlocals << " != 0 on the "
3531  "calling process. This means either that you incorrectly "
3532  "set the \"No Nonlocal Changes\" fillComplete parameter "
3533  "to true, or that you inserted invalid entries. "
3534  "Rerun with the environment "
3535  "variable TPETRA_VERBOSE=CrsGraph set to see the entries "
3536  "of nonlocals_ on every MPI process (WARNING: lots of "
3537  "output).");
3538  }
3539  }
3540 
3541  // Set domain and range Map. This may clear the Import / Export
3542  // objects if the new Maps differ from any old ones.
3543  setDomainRangeMaps (domainMap, rangeMap);
3544 
3545  // If the graph does not already have a column Map (either from
3546  // the user constructor calling the version of the constructor
3547  // that takes a column Map, or from a previous fillComplete call),
3548  // then create it.
3549  Teuchos::Array<int> remotePIDs (0);
3550  const bool mustBuildColMap = ! this->hasColMap ();
3551  if (mustBuildColMap) {
3552  this->makeColMap (remotePIDs); // resized on output
3553  }
3554 
3555  // Make indices local, if they aren't already.
3556  // The method doesn't do any work if the indices are already local.
3557  const std::pair<size_t, std::string> makeIndicesLocalResult =
3558  this->makeIndicesLocal(verbose);
3559  if (debug_) {
3560  using Details::gathervPrint;
3561  using Teuchos::RCP;
3562  using Teuchos::REDUCE_MIN;
3563  using Teuchos::reduceAll;
3564  using Teuchos::outArg;
3565 
3566  RCP<const map_type> map = this->getMap ();
3567  RCP<const Teuchos::Comm<int> > comm;
3568  if (! map.is_null ()) {
3569  comm = map->getComm ();
3570  }
3571  if (comm.is_null ()) {
3572  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3573  (makeIndicesLocalResult.first != 0, std::runtime_error,
3574  makeIndicesLocalResult.second);
3575  }
3576  else {
3577  const int lclSuccess = (makeIndicesLocalResult.first == 0);
3578  int gblSuccess = 0; // output argument
3579  reduceAll (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
3580  if (gblSuccess != 1) {
3581  std::ostringstream os;
3582  gathervPrint (os, makeIndicesLocalResult.second, *comm);
3583  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3584  (true, std::runtime_error, os.str ());
3585  }
3586  }
3587  }
3588  else {
3589  // TODO (mfh 20 Jul 2017) Instead of throwing here, pass along
3590  // the error state to makeImportExport or
3591  // computeGlobalConstants, which may do all-reduces and thus may
3592  // have the opportunity to communicate that error state.
3593  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3594  (makeIndicesLocalResult.first != 0, std::runtime_error,
3595  makeIndicesLocalResult.second);
3596  }
3597 
3598  // If this process has no indices, then CrsGraph considers it
3599  // already trivially sorted and merged. Thus, this method need
3600  // not be called on all processes in the row Map's communicator.
3601  this->sortAndMergeAllIndices (this->isSorted (), this->isMerged ());
3602 
3603  // Make Import and Export objects, if they haven't been made
3604  // already. If we made a column Map above, reuse information from
3605  // that process to avoid communiation in the Import setup.
3606  this->makeImportExport (remotePIDs, mustBuildColMap);
3607 
3608  // Create the Kokkos::StaticCrsGraph, if it doesn't already exist.
3609  this->fillLocalGraph (params);
3610 
3611  const bool callComputeGlobalConstants = params.get () == nullptr ||
3612  params->get ("compute global constants", true);
3613  if (callComputeGlobalConstants) {
3614  this->computeGlobalConstants ();
3615  }
3616  else {
3617  this->computeLocalConstants ();
3618  }
3619  this->fillComplete_ = true;
3620  this->checkInternalState ();
3621 
3622  if (verbose) {
3623  std::ostringstream os;
3624  os << *prefix << "Done" << endl;
3625  std::cerr << os.str();
3626  }
3627  }
3628 
3629 
3630  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3631  void
3633  expertStaticFillComplete (const Teuchos::RCP<const map_type>& domainMap,
3634  const Teuchos::RCP<const map_type>& rangeMap,
3635  const Teuchos::RCP<const import_type>& importer,
3636  const Teuchos::RCP<const export_type>& exporter,
3637  const Teuchos::RCP<Teuchos::ParameterList>& params)
3638  {
3639  const char tfecfFuncName[] = "expertStaticFillComplete: ";
3640 #ifdef HAVE_TPETRA_MMM_TIMINGS
3641  std::string label;
3642  if(!params.is_null())
3643  label = params->get("Timer Label",label);
3644  std::string prefix = std::string("Tpetra ")+ label + std::string(": ");
3645  using Teuchos::TimeMonitor;
3646  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Setup"))));
3647 #endif
3648 
3649 
3650  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3651  domainMap.is_null () || rangeMap.is_null (),
3652  std::runtime_error, "The input domain Map and range Map must be nonnull.");
3653  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3654  isFillComplete () || ! hasColMap (), std::runtime_error, "You may not "
3655  "call this method unless the graph has a column Map.");
3656  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3657  getNodeNumRows () > 0 && k_rowPtrs_.extent (0) == 0,
3658  std::runtime_error, "The calling process has getNodeNumRows() = "
3659  << getNodeNumRows () << " > 0 rows, but the row offsets array has not "
3660  "been set.");
3661  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3662  static_cast<size_t> (k_rowPtrs_.extent (0)) != getNodeNumRows () + 1,
3663  std::runtime_error, "The row offsets array has length " <<
3664  k_rowPtrs_.extent (0) << " != getNodeNumRows()+1 = " <<
3665  (getNodeNumRows () + 1) << ".");
3666 
3667  // Note: We don't need to do the following things which are normally done in fillComplete:
3668  // allocateIndices, globalAssemble, makeColMap, makeIndicesLocal, sortAndMergeAllIndices
3669 
3670  // Constants from allocateIndices
3671  //
3672  // mfh 08 Aug 2014: numAllocForAllRows_ and k_numAllocPerRow_ go
3673  // away once the graph is allocated. expertStaticFillComplete
3674  // either presumes that the graph is allocated, or "allocates" it.
3675  //
3676  // FIXME (mfh 08 Aug 2014) The goal for the Kokkos refactor
3677  // version of CrsGraph is to allocate in the constructor, not
3678  // lazily on first insert. That will make both
3679  // numAllocForAllRows_ and k_numAllocPerRow_ obsolete.
3680  numAllocForAllRows_ = 0;
3681  k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
3682  indicesAreAllocated_ = true;
3683 
3684  // Constants from makeIndicesLocal
3685  //
3686  // The graph has a column Map, so its indices had better be local.
3687  indicesAreLocal_ = true;
3688  indicesAreGlobal_ = false;
3689 
3690  // set domain/range map: may clear the import/export objects
3691 #ifdef HAVE_TPETRA_MMM_TIMINGS
3692  MM = Teuchos::null;
3693  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Maps"))));
3694 #endif
3695  setDomainRangeMaps (domainMap, rangeMap);
3696 
3697  // Presume the user sorted and merged the arrays first
3698  indicesAreSorted_ = true;
3699  noRedundancies_ = true;
3700 
3701  // makeImportExport won't create a new importer/exporter if I set one here first.
3702 #ifdef HAVE_TPETRA_MMM_TIMINGS
3703  MM = Teuchos::null;
3704  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckI"))));
3705 #endif
3706 
3707  importer_ = Teuchos::null;
3708  exporter_ = Teuchos::null;
3709  if (importer != Teuchos::null) {
3710  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3711  ! importer->getSourceMap ()->isSameAs (*getDomainMap ()) ||
3712  ! importer->getTargetMap ()->isSameAs (*getColMap ()),
3713  std::invalid_argument,": importer does not match matrix maps.");
3714  importer_ = importer;
3715 
3716  }
3717 
3718 #ifdef HAVE_TPETRA_MMM_TIMINGS
3719  MM = Teuchos::null;
3720  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckE"))));
3721 #endif
3722 
3723  if (exporter != Teuchos::null) {
3724  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3725  ! exporter->getSourceMap ()->isSameAs (*getRowMap ()) ||
3726  ! exporter->getTargetMap ()->isSameAs (*getRangeMap ()),
3727  std::invalid_argument,": exporter does not match matrix maps.");
3728  exporter_ = exporter;
3729  }
3730 
3731 #ifdef HAVE_TPETRA_MMM_TIMINGS
3732  MM = Teuchos::null;
3733  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXmake"))));
3734 #endif
3735  Teuchos::Array<int> remotePIDs (0); // unused output argument
3736  this->makeImportExport (remotePIDs, false);
3737 
3738 #ifdef HAVE_TPETRA_MMM_TIMINGS
3739  MM = Teuchos::null;
3740  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-fLG"))));
3741 #endif
3742  this->fillLocalGraph (params);
3743 
3744  const bool callComputeGlobalConstants = params.get () == nullptr ||
3745  params->get ("compute global constants", true);
3746 
3747  if (callComputeGlobalConstants) {
3748 #ifdef HAVE_TPETRA_MMM_TIMINGS
3749  MM = Teuchos::null;
3750  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cGC (const)"))));
3751 #endif // HAVE_TPETRA_MMM_TIMINGS
3752  this->computeGlobalConstants ();
3753  }
3754  else {
3755 #ifdef HAVE_TPETRA_MMM_TIMINGS
3756  MM = Teuchos::null;
3757  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cGC (noconst)"))));
3758 #endif // HAVE_TPETRA_MMM_TIMINGS
3759  this->computeLocalConstants ();
3760  }
3761 
3762  fillComplete_ = true;
3763 
3764 #ifdef HAVE_TPETRA_MMM_TIMINGS
3765  MM = Teuchos::null;
3766  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cIS"))));
3767 #endif
3768  checkInternalState ();
3769  }
3770 
3771 
3772  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3773  void
3775  fillLocalGraph (const Teuchos::RCP<Teuchos::ParameterList>& params)
3776  {
3778  typedef decltype (k_numRowEntries_) row_entries_type;
3779  typedef typename local_graph_type::row_map_type row_map_type;
3780  typedef typename row_map_type::non_const_type non_const_row_map_type;
3781  typedef typename local_graph_type::entries_type::non_const_type lclinds_1d_type;
3782  const char tfecfFuncName[] = "fillLocalGraph (called from fillComplete or "
3783  "expertStaticFillComplete): ";
3784  const size_t lclNumRows = this->getNodeNumRows ();
3785 
3786  // This method's goal is to fill in the two arrays (compressed
3787  // sparse row format) that define the sparse graph's structure.
3788  //
3789  // Use the nonconst version of row_map_type for ptr_d, because
3790  // the latter is const and we need to modify ptr_d here.
3791  non_const_row_map_type ptr_d;
3792  row_map_type ptr_d_const;
3793  lclinds_1d_type ind_d;
3794 
3795  bool requestOptimizedStorage = true;
3796  if (! params.is_null () && ! params->get ("Optimize Storage", true)) {
3797  requestOptimizedStorage = false;
3798  }
3799 
3800  // The graph's column indices are currently stored in a 1-D
3801  // format, with row offsets in k_rowPtrs_ and local column indices
3802  // in k_lclInds1D_.
3803 
3804  if (debug_) {
3805  // The graph's array of row offsets must already be allocated.
3806  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3807  (k_rowPtrs_.extent (0) == 0, std::logic_error,
3808  "k_rowPtrs_ has size zero, but shouldn't");
3809  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3810  (k_rowPtrs_.extent (0) != lclNumRows + 1, std::logic_error,
3811  "k_rowPtrs_.extent(0) = "
3812  << k_rowPtrs_.extent (0) << " != (lclNumRows + 1) = "
3813  << (lclNumRows + 1) << ".");
3814  const size_t numOffsets = k_rowPtrs_.extent (0);
3815  const auto valToCheck =
3816  ::Tpetra::Details::getEntryOnHost (k_rowPtrs_, numOffsets - 1);
3817  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3818  (numOffsets != 0 &&
3819  k_lclInds1D_.extent (0) != valToCheck,
3820  std::logic_error, "numOffsets=" << numOffsets << " != 0 "
3821  " and k_lclInds1D_.extent(0)=" << k_lclInds1D_.extent(0)
3822  << " != k_rowPtrs_(" << numOffsets << ")=" << valToCheck
3823  << ".");
3824  }
3825 
3826  size_t allocSize = 0;
3827  try {
3828  allocSize = this->getNodeAllocationSize ();
3829  }
3830  catch (std::logic_error& e) {
3831  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3832  (true, std::logic_error, "getNodeAllocationSize threw "
3833  "std::logic_error: " << e.what ());
3834  }
3835  catch (std::runtime_error& e) {
3836  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3837  (true, std::runtime_error, "getNodeAllocationSize threw "
3838  "std::runtime_error: " << e.what ());
3839  }
3840  catch (std::exception& e) {
3841  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3842  (true, std::runtime_error, "getNodeAllocationSize threw "
3843  "std::exception: " << e.what ());
3844  }
3845  catch (...) {
3846  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3847  (true, std::runtime_error, "getNodeAllocationSize threw "
3848  "an exception not a subclass of std::exception.");
3849  }
3850 
3851  if (this->getNodeNumEntries () != allocSize) {
3852  // The graph's current 1-D storage is "unpacked." This means
3853  // the row offsets may differ from what the final row offsets
3854  // should be. This could happen, for example, if the user set
3855  // an upper bound on the number of entries in each row, but
3856  // didn't fill all those entries.
3857 
3858  if (debug_) {
3859  if (k_rowPtrs_.extent (0) != 0) {
3860  const size_t numOffsets =
3861  static_cast<size_t> (k_rowPtrs_.extent (0));
3862  const auto valToCheck =
3863  ::Tpetra::Details::getEntryOnHost (k_rowPtrs_, numOffsets - 1);
3864  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3865  (valToCheck != size_t(k_lclInds1D_.extent(0)),
3866  std::logic_error, "(Unpacked branch) Before allocating "
3867  "or packing, k_rowPtrs_(" << (numOffsets-1) << ")="
3868  << valToCheck << " != k_lclInds1D_.extent(0)="
3869  << k_lclInds1D_.extent (0) << ".");
3870  }
3871  }
3872 
3873  // Pack the row offsets into ptr_d, by doing a sum-scan of the
3874  // array of valid entry counts per row (k_numRowEntries_).
3875 
3876  // Total number of entries in the matrix on the calling
3877  // process. We will compute this in the loop below. It's
3878  // cheap to compute and useful as a sanity check.
3879  size_t lclTotalNumEntries = 0;
3880  {
3881  // Allocate the packed row offsets array.
3882  ptr_d = non_const_row_map_type ("Tpetra::CrsGraph::ptr", lclNumRows + 1);
3883  ptr_d_const = ptr_d;
3884 
3885  // It's ok that k_numRowEntries_ is a host View; the
3886  // function can handle this.
3887  typename row_entries_type::const_type numRowEnt_h = k_numRowEntries_;
3888  if (debug_) {
3889  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3890  (size_t(numRowEnt_h.extent (0)) != lclNumRows,
3891  std::logic_error, "(Unpacked branch) "
3892  "numRowEnt_h.extent(0)=" << numRowEnt_h.extent(0)
3893  << " != getNodeNumRows()=" << lclNumRows << "");
3894  }
3895 
3896  lclTotalNumEntries = computeOffsetsFromCounts (ptr_d, numRowEnt_h);
3897 
3898  if (debug_) {
3899  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3900  (static_cast<size_t> (ptr_d.extent (0)) != lclNumRows + 1,
3901  std::logic_error, "(Unpacked branch) After allocating "
3902  "ptr_d, ptr_d.extent(0) = " << ptr_d.extent(0)
3903  << " != lclNumRows+1 = " << (lclNumRows+1) << ".");
3904  const auto valToCheck =
3905  ::Tpetra::Details::getEntryOnHost (ptr_d, lclNumRows);
3906  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3907  (valToCheck != lclTotalNumEntries, std::logic_error,
3908  "Tpetra::CrsGraph::fillLocalGraph: In unpacked branch, "
3909  "after filling ptr_d, ptr_d(lclNumRows=" << lclNumRows
3910  << ") = " << valToCheck << " != total number of entries "
3911  "on the calling process = " << lclTotalNumEntries
3912  << ".");
3913  }
3914  }
3915 
3916  // Allocate the array of packed column indices.
3917  ind_d = lclinds_1d_type ("Tpetra::CrsGraph::ind", lclTotalNumEntries);
3918 
3919  // k_rowPtrs_ and k_lclInds1D_ are currently unpacked. Pack
3920  // them, using the packed row offsets array ptr_d that we
3921  // created above.
3922  //
3923  // FIXME (mfh 08 Aug 2014) If "Optimize Storage" is false (in
3924  // CrsMatrix?), we need to keep around the unpacked row
3925  // offsets and column indices.
3926 
3927  // Pack the column indices from unpacked k_lclInds1D_ into
3928  // packed ind_d. We will replace k_lclInds1D_ below.
3929  typedef pack_functor<
3930  typename local_graph_type::entries_type::non_const_type,
3931  row_map_type> inds_packer_type;
3932  inds_packer_type f (ind_d, k_lclInds1D_, ptr_d, k_rowPtrs_);
3933  {
3934  typedef typename decltype (ind_d)::execution_space exec_space;
3935  typedef Kokkos::RangePolicy<exec_space, LocalOrdinal> range_type;
3936  Kokkos::parallel_for (range_type (0, lclNumRows), f);
3937  }
3938 
3939  if (debug_) {
3940  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3941  (ptr_d.extent (0) == 0, std::logic_error,
3942  "(\"Optimize Storage\"=true branch) After packing, "
3943  "ptr_d.extent(0)=0. This probably means k_rowPtrs_ was "
3944  "never allocated.");
3945  if (ptr_d.extent (0) != 0) {
3946  const size_t numOffsets = static_cast<size_t> (ptr_d.extent (0));
3947  const auto valToCheck =
3948  ::Tpetra::Details::getEntryOnHost (ptr_d, numOffsets - 1);
3949  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3950  (static_cast<size_t> (valToCheck) != ind_d.extent (0),
3951  std::logic_error, "(\"Optimize Storage\"=true branch) "
3952  "After packing, ptr_d(" << (numOffsets-1) << ")="
3953  << valToCheck << " != ind_d.extent(0)="
3954  << ind_d.extent(0) << ".");
3955  }
3956  }
3957  }
3958  else { // We don't have to pack, so just set the pointers.
3959  ptr_d_const = k_rowPtrs_;
3960  ind_d = k_lclInds1D_;
3961 
3962  if (debug_) {
3963  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3964  (ptr_d_const.extent (0) == 0, std::logic_error,
3965  "(\"Optimize Storage\"=false branch) "
3966  "ptr_d_const.extent(0) = 0. This probably means that "
3967  "k_rowPtrs_ was never allocated.");
3968  if (ptr_d_const.extent (0) != 0) {
3969  const size_t numOffsets =
3970  static_cast<size_t> (ptr_d_const.extent (0));
3971  const size_t valToCheck =
3972  ::Tpetra::Details::getEntryOnHost (ptr_d_const, numOffsets - 1);
3973  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3974  (valToCheck != size_t(ind_d.extent (0)),
3975  std::logic_error, "(\"Optimize Storage\"=false branch) "
3976  "ptr_d_const(" << (numOffsets-1) << ")=" << valToCheck
3977  << " != ind_d.extent(0)=" << ind_d.extent (0) << ".");
3978  }
3979  }
3980  }
3981 
3982  if (debug_) {
3983  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3984  (static_cast<size_t> (ptr_d_const.extent (0)) != lclNumRows + 1,
3985  std::logic_error, "After packing, ptr_d_const.extent(0) = " <<
3986  ptr_d_const.extent (0) << " != lclNumRows+1 = " << (lclNumRows+1)
3987  << ".");
3988  if (ptr_d_const.extent (0) != 0) {
3989  const size_t numOffsets = static_cast<size_t> (ptr_d_const.extent (0));
3990  const auto valToCheck =
3991  ::Tpetra::Details::getEntryOnHost (ptr_d_const, numOffsets - 1);
3992  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3993  (static_cast<size_t> (valToCheck) != ind_d.extent (0),
3994  std::logic_error, "After packing, ptr_d_const(" << (numOffsets-1)
3995  << ") = " << valToCheck << " != ind_d.extent(0) = "
3996  << ind_d.extent (0) << ".");
3997  }
3998  }
3999 
4000  if (requestOptimizedStorage) {
4001  // With optimized storage, we don't need to store
4002  // the array of row entry counts.
4003 
4004  // Free graph data structures that are only needed for
4005  // unpacked 1-D storage.
4006  k_numRowEntries_ = row_entries_type ();
4007 
4008  // Keep the new 1-D packed allocations.
4009  k_rowPtrs_ = ptr_d_const;
4010  k_lclInds1D_ = ind_d;
4011 
4012  storageStatus_ = Details::STORAGE_1D_PACKED;
4013  }
4014 
4015  // FIXME (mfh 28 Aug 2014) "Local Graph" sublist no longer used.
4016 
4017  // Build the local graph.
4018  lclGraph_ = local_graph_type (ind_d, ptr_d_const);
4019  }
4020 
4021  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4022  void
4024  replaceColMap (const Teuchos::RCP<const map_type>& newColMap)
4025  {
4026  // NOTE: This safety check matches the code, but not the documentation of Crsgraph
4027  //
4028  // FIXME (mfh 18 Aug 2014) This will break if the calling process
4029  // has no entries, because in that case, currently it is neither
4030  // locally nor globally indexed. This will change once we get rid
4031  // of lazy allocation (so that the constructor allocates indices
4032  // and therefore commits to local vs. global).
4033  const char tfecfFuncName[] = "replaceColMap: ";
4034  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4035  isLocallyIndexed () || isGloballyIndexed (), std::runtime_error,
4036  "Requires matching maps and non-static graph.");
4037  colMap_ = newColMap;
4038  }
4039 
4040  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4041  void
4043  reindexColumns (const Teuchos::RCP<const map_type>& newColMap,
4044  const Teuchos::RCP<const import_type>& newImport,
4045  const bool sortIndicesInEachRow)
4046  {
4047  using Teuchos::REDUCE_MIN;
4048  using Teuchos::reduceAll;
4049  using Teuchos::RCP;
4050  typedef GlobalOrdinal GO;
4051  typedef LocalOrdinal LO;
4052  typedef typename local_graph_type::entries_type::non_const_type col_inds_type;
4053  const char tfecfFuncName[] = "reindexColumns: ";
4054 
4055  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4056  isFillComplete (), std::runtime_error, "The graph is fill complete "
4057  "(isFillComplete() returns true). You must call resumeFill() before "
4058  "you may call this method.");
4059 
4060  // mfh 19 Aug 2014: This method does NOT redistribute data; it
4061  // doesn't claim to do the work of an Import or Export. This
4062  // means that for all processes, the calling process MUST own all
4063  // column indices, in both the old column Map (if it exists) and
4064  // the new column Map. We check this via an all-reduce.
4065  //
4066  // Some processes may be globally indexed, others may be locally
4067  // indexed, and others (that have no graph entries) may be
4068  // neither. This method will NOT change the graph's current
4069  // state. If it's locally indexed, it will stay that way, and
4070  // vice versa. It would easy to add an option to convert indices
4071  // from global to local, so as to save a global-to-local
4072  // conversion pass. However, we don't do this here. The intended
4073  // typical use case is that the graph already has a column Map and
4074  // is locally indexed, and this is the case for which we optimize.
4075 
4076  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
4077 
4078  // Attempt to convert indices to the new column Map's version of
4079  // local. This will fail if on the calling process, the graph has
4080  // indices that are not on that process in the new column Map.
4081  // After the local conversion attempt, we will do an all-reduce to
4082  // see if any processes failed.
4083 
4084  // If this is false, then either the graph contains a column index
4085  // which is invalid in the CURRENT column Map, or the graph is
4086  // locally indexed but currently has no column Map. In either
4087  // case, there is no way to convert the current local indices into
4088  // global indices, so that we can convert them into the new column
4089  // Map's local indices. It's possible for this to be true on some
4090  // processes but not others, due to replaceColMap.
4091  bool allCurColIndsValid = true;
4092  // On the calling process, are all valid current column indices
4093  // also in the new column Map on the calling process? In other
4094  // words, does local reindexing suffice, or should the user have
4095  // done an Import or Export instead?
4096  bool localSuffices = true;
4097 
4098  // Final arrays for the local indices. We will allocate exactly
4099  // one of these ONLY if the graph is locally indexed on the
4100  // calling process, and ONLY if the graph has one or more entries
4101  // (is not empty) on the calling process. In that case, we
4102  // allocate the first (1-D storage) if the graph has a static
4103  // profile, else we allocate the second (2-D storage).
4104  typename local_graph_type::entries_type::non_const_type newLclInds1D;
4105  Teuchos::ArrayRCP<Teuchos::Array<LO> > newLclInds2D;
4106 
4107  // If indices aren't allocated, that means the calling process
4108  // owns no entries in the graph. Thus, there is nothing to
4109  // convert, and it trivially succeeds locally.
4110  if (indicesAreAllocated ()) {
4111  if (isLocallyIndexed ()) {
4112  if (hasColMap ()) { // locally indexed, and currently has a column Map
4113  const map_type& oldColMap = * (getColMap ());
4114  // Allocate storage for the new local indices.
4115  const size_t allocSize = this->getNodeAllocationSize ();
4116  newLclInds1D = col_inds_type ("Tpetra::CrsGraph::ind", allocSize);
4117  // Attempt to convert the new indices locally.
4118  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
4119  const RowInfo rowInfo = this->getRowInfo (lclRow);
4120  const size_t beg = rowInfo.offset1D;
4121  const size_t end = beg + rowInfo.numEntries;
4122  for (size_t k = beg; k < end; ++k) {
4123  // FIXME (mfh 21 Aug 2014) This assumes UVM. Should
4124  // use a DualView instead.
4125  const LO oldLclCol = k_lclInds1D_(k);
4126  if (oldLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
4127  allCurColIndsValid = false;
4128  break; // Stop at the first invalid index
4129  }
4130  const GO gblCol = oldColMap.getGlobalElement (oldLclCol);
4131 
4132  // The above conversion MUST succeed. Otherwise, the
4133  // current local index is invalid, which means that
4134  // the graph was constructed incorrectly.
4135  if (gblCol == Teuchos::OrdinalTraits<GO>::invalid ()) {
4136  allCurColIndsValid = false;
4137  break; // Stop at the first invalid index
4138  }
4139  else {
4140  const LO newLclCol = newColMap->getLocalElement (gblCol);
4141  if (newLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
4142  localSuffices = false;
4143  break; // Stop at the first invalid index
4144  }
4145  // FIXME (mfh 21 Aug 2014) This assumes UVM. Should
4146  // use a DualView instead.
4147  newLclInds1D(k) = newLclCol;
4148  }
4149  } // for each entry in the current row
4150  } // for each locally owned row
4151  }
4152  else { // locally indexed, but no column Map
4153  // This case is only possible if replaceColMap() was called
4154  // with a null argument on the calling process. It's
4155  // possible, but it means that this method can't possibly
4156  // succeed, since we have no way of knowing how to convert
4157  // the current local indices to global indices.
4158  allCurColIndsValid = false;
4159  }
4160  }
4161  else { // globally indexed
4162  // If the graph is globally indexed, we don't need to save
4163  // local indices, but we _do_ need to know whether the current
4164  // global indices are valid in the new column Map. We may
4165  // need to do a getRemoteIndexList call to find this out.
4166  //
4167  // In this case, it doesn't matter whether the graph currently
4168  // has a column Map. We don't need the old column Map to
4169  // convert from global indices to the _new_ column Map's local
4170  // indices. Furthermore, we can use the same code, whether
4171  // the graph is static or dynamic profile.
4172 
4173  // Test whether the current global indices are in the new
4174  // column Map on the calling process.
4175  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
4176  const RowInfo rowInfo = this->getRowInfo (lclRow);
4177  Teuchos::ArrayView<const GO> oldGblRowView = getGlobalView (rowInfo);
4178  for (size_t k = 0; k < rowInfo.numEntries; ++k) {
4179  const GO gblCol = oldGblRowView[k];
4180  if (! newColMap->isNodeGlobalElement (gblCol)) {
4181  localSuffices = false;
4182  break; // Stop at the first invalid index
4183  }
4184  } // for each entry in the current row
4185  } // for each locally owned row
4186  } // locally or globally indexed
4187  } // whether indices are allocated
4188 
4189  // Do an all-reduce to check both possible error conditions.
4190  int lclSuccess[2];
4191  lclSuccess[0] = allCurColIndsValid ? 1 : 0;
4192  lclSuccess[1] = localSuffices ? 1 : 0;
4193  int gblSuccess[2];
4194  gblSuccess[0] = 0;
4195  gblSuccess[1] = 0;
4196  RCP<const Teuchos::Comm<int> > comm =
4197  getRowMap ().is_null () ? Teuchos::null : getRowMap ()->getComm ();
4198  if (! comm.is_null ()) {
4199  reduceAll<int, int> (*comm, REDUCE_MIN, 2, lclSuccess, gblSuccess);
4200  }
4201 
4202  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4203  gblSuccess[0] == 0, std::runtime_error, "It is not possible to continue."
4204  " The most likely reason is that the graph is locally indexed, but the "
4205  "column Map is missing (null) on some processes, due to a previous call "
4206  "to replaceColMap().");
4207 
4208  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4209  gblSuccess[1] == 0, std::runtime_error, "On some process, the graph "
4210  "contains column indices that are in the old column Map, but not in the "
4211  "new column Map (on that process). This method does NOT redistribute "
4212  "data; it does not claim to do the work of an Import or Export operation."
4213  " This means that for all processess, the calling process MUST own all "
4214  "column indices, in both the old column Map and the new column Map. In "
4215  "this case, you will need to do an Import or Export operation to "
4216  "redistribute data.");
4217 
4218  // Commit the results.
4219  if (isLocallyIndexed ()) {
4220  k_lclInds1D_ = newLclInds1D;
4221  // We've reindexed, so we don't know if the indices are sorted.
4222  //
4223  // FIXME (mfh 17 Sep 2014) It could make sense to check this,
4224  // since we're already going through all the indices above. We
4225  // could also sort each row in place; that way, we would only
4226  // have to make one pass over the rows.
4227  indicesAreSorted_ = false;
4228  if (sortIndicesInEachRow) {
4229  // NOTE (mfh 17 Sep 2014) The graph must be locally indexed in
4230  // order to call this method.
4231  //
4232  // FIXME (mfh 17 Sep 2014) This violates the strong exception
4233  // guarantee. It would be better to sort the new index arrays
4234  // before committing them.
4235  const bool sorted = false; // need to resort
4236  const bool merged = true; // no need to merge, since no dups
4237  this->sortAndMergeAllIndices (sorted, merged);
4238  }
4239  }
4240  colMap_ = newColMap;
4241 
4242  if (newImport.is_null ()) {
4243  // FIXME (mfh 19 Aug 2014) Should use the above all-reduce to
4244  // check whether the input Import is null on any process.
4245  //
4246  // If the domain Map hasn't been set yet, we can't compute a new
4247  // Import object. Leave it what it is; it should be null, but
4248  // it doesn't matter. If the domain Map _has_ been set, then
4249  // compute a new Import object if necessary.
4250  if (! domainMap_.is_null ()) {
4251  if (! domainMap_->isSameAs (* newColMap)) {
4252  importer_ = Teuchos::rcp (new import_type (domainMap_, newColMap));
4253  } else {
4254  importer_ = Teuchos::null; // don't need an Import
4255  }
4256  }
4257  } else {
4258  // The caller gave us an Import object. Assume that it's valid.
4259  importer_ = newImport;
4260  }
4261  }
4262 
4263 
4264  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4265  void
4267  replaceDomainMapAndImporter (const Teuchos::RCP<const map_type>& newDomainMap,
4268  const Teuchos::RCP<const import_type>& newImporter)
4269  {
4270  const char prefix[] = "Tpetra::CrsGraph::replaceDomainMapAndImporter: ";
4271  TEUCHOS_TEST_FOR_EXCEPTION(
4272  colMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4273  "this method unless the graph already has a column Map.");
4274  TEUCHOS_TEST_FOR_EXCEPTION(
4275  newDomainMap.is_null (), std::invalid_argument,
4276  prefix << "The new domain Map must be nonnull.");
4277 
4278  if (debug_) {
4279  if (newImporter.is_null ()) {
4280  // It's not a good idea to put expensive operations in a macro
4281  // clause, even if they are side effect - free, because macros
4282  // don't promise that they won't evaluate their arguments more
4283  // than once. It's polite for them to do so, but not required.
4284  const bool colSameAsDom = colMap_->isSameAs (*newDomainMap);
4285  TEUCHOS_TEST_FOR_EXCEPTION
4286  (colSameAsDom, std::invalid_argument, "If the new Import is null, "
4287  "then the new domain Map must be the same as the current column Map.");
4288  }
4289  else {
4290  const bool colSameAsTgt =
4291  colMap_->isSameAs (* (newImporter->getTargetMap ()));
4292  const bool newDomSameAsSrc =
4293  newDomainMap->isSameAs (* (newImporter->getSourceMap ()));
4294  TEUCHOS_TEST_FOR_EXCEPTION
4295  (! colSameAsTgt || ! newDomSameAsSrc, std::invalid_argument, "If the "
4296  "new Import is nonnull, then the current column Map must be the same "
4297  "as the new Import's target Map, and the new domain Map must be the "
4298  "same as the new Import's source Map.");
4299  }
4300  }
4301 
4302  domainMap_ = newDomainMap;
4303  importer_ = Teuchos::rcp_const_cast<import_type> (newImporter);
4304  }
4305 
4306  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4310  {
4311  return lclGraph_;
4312  }
4313 
4314  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4315  void
4318  {
4319  using ::Tpetra::Details::ProfilingRegion;
4320  using Teuchos::ArrayView;
4321  using Teuchos::outArg;
4322  using Teuchos::reduceAll;
4323  typedef global_size_t GST;
4324 
4325  ProfilingRegion regionCGC ("Tpetra::CrsGraph::computeGlobalConstants");
4326 
4327  this->computeLocalConstants ();
4328 
4329  // Compute global constants from local constants. Processes that
4330  // already have local constants still participate in the
4331  // all-reduces, using their previously computed values.
4332  if (! this->haveGlobalConstants_) {
4333  const Teuchos::Comm<int>& comm = * (this->getComm ());
4334  // Promote all the nodeNum* and nodeMaxNum* quantities from
4335  // size_t to global_size_t, when doing the all-reduces for
4336  // globalNum* / globalMaxNum* results.
4337  //
4338  // FIXME (mfh 07 May 2013) Unfortunately, we either have to do
4339  // this in two all-reduces (one for the sum and the other for
4340  // the max), or use a custom MPI_Op that combines the sum and
4341  // the max. The latter might even be slower than two
4342  // all-reduces on modern network hardware. It would also be a
4343  // good idea to use nonblocking all-reduces (MPI 3), so that we
4344  // don't have to wait around for the first one to finish before
4345  // starting the second one.
4346  GST lcl, gbl;
4347  lcl = static_cast<GST> (this->getNodeNumEntries ());
4348 
4349  reduceAll<int,GST> (comm, Teuchos::REDUCE_SUM, 1, &lcl, &gbl);
4350  this->globalNumEntries_ = gbl;
4351 
4352  const GST lclMaxNumRowEnt = static_cast<GST> (this->nodeMaxNumRowEntries_);
4353  reduceAll<int, GST> (comm, Teuchos::REDUCE_MAX, lclMaxNumRowEnt,
4354  outArg (this->globalMaxNumRowEntries_));
4355  this->haveGlobalConstants_ = true;
4356  }
4357  }
4358 
4359 
4360  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4361  void
4364  {
4365  using ::Tpetra::Details::ProfilingRegion;
4366 
4367  ProfilingRegion regionCLC ("Tpetra::CrsGraph::computeLocalConstants");
4368  if (this->haveLocalConstants_) {
4369  return;
4370  }
4371 
4372  // Reset local properties
4373  this->nodeMaxNumRowEntries_ =
4374  Teuchos::OrdinalTraits<size_t>::invalid();
4375 
4376  using LO = local_ordinal_type;
4377 
4378  // KJ: This one is a bit different from the above. Conservatively thinking,
4379  // we also need the fence here as lclGraph_.row_map is on UVM and it can be
4380  // still updated. In practice, the local graph construction should be done
4381  // before this is called. This routine is computeLocalConstants. If we want
4382  // a better code, we need a flag stating that the local graph is completed
4383  // and safe to use it without fence.
4384  // For now, I recommend to put the fence. Defining the state of local
4385  // object can be improvements in the code.
4386  execution_space().fence ();
4387 
4388  auto ptr = this->lclGraph_.row_map;
4389  const LO lclNumRows = ptr.extent(0) == 0 ?
4390  static_cast<LO> (0) :
4391  (static_cast<LO> (ptr.extent(0)) - static_cast<LO> (1));
4392 
4393  const LO lclMaxNumRowEnt =
4394  ::Tpetra::Details::maxDifference ("Tpetra::CrsGraph: nodeMaxNumRowEntries",
4395  ptr, lclNumRows);
4396  this->nodeMaxNumRowEntries_ = static_cast<size_t> (lclMaxNumRowEnt);
4397  this->haveLocalConstants_ = true;
4398  }
4399 
4400 
4401  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4402  std::pair<size_t, std::string>
4404  makeIndicesLocal (const bool verbose)
4405  {
4407  using Teuchos::arcp;
4408  using Teuchos::Array;
4409  using std::endl;
4410  typedef LocalOrdinal LO;
4411  typedef GlobalOrdinal GO;
4412  typedef device_type DT;
4413  typedef typename local_graph_type::row_map_type::non_const_value_type offset_type;
4414  typedef decltype (k_numRowEntries_) row_entries_type;
4415  typedef typename row_entries_type::non_const_value_type num_ent_type;
4416  typedef typename local_graph_type::entries_type::non_const_type
4417  lcl_col_inds_type;
4418  typedef Kokkos::View<GO*, typename lcl_col_inds_type::array_layout,
4419  device_type> gbl_col_inds_type;
4420  const char tfecfFuncName[] = "makeIndicesLocal: ";
4421  ProfilingRegion regionMakeIndicesLocal ("Tpetra::CrsGraph::makeIndicesLocal");
4422 
4423  std::unique_ptr<std::string> prefix;
4424  if (verbose) {
4425  prefix = this->createPrefix("CrsGraph", "makeIndicesLocal");
4426  std::ostringstream os;
4427  os << *prefix << "lclNumRows: " << getNodeNumRows() << endl;
4428  std::cerr << os.str();
4429  }
4430 
4431  // These are somewhat global properties, so it's safe to have
4432  // exception checks for them, rather than returning an error code.
4433  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4434  (! this->hasColMap (), std::logic_error, "The graph does not have a "
4435  "column Map yet. This method should never be called in that case. "
4436  "Please report this bug to the Tpetra developers.");
4437  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4438  (this->getColMap ().is_null (), std::logic_error, "The graph claims "
4439  "that it has a column Map, because hasColMap() returns true. However, "
4440  "the result of getColMap() is null. This should never happen. Please "
4441  "report this bug to the Tpetra developers.");
4442 
4443  // Return value 1: The number of column indices (counting
4444  // duplicates) that could not be converted to local indices,
4445  // because they were not in the column Map on the calling process.
4446  size_t lclNumErrs = 0;
4447  std::ostringstream errStrm; // for return value 2 (error string)
4448 
4449  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
4450  const map_type& colMap = * (this->getColMap ());
4451 
4452  if (this->isGloballyIndexed () && lclNumRows != 0) {
4453  // This is a host-accessible View.
4454  typename row_entries_type::const_type h_numRowEnt =
4455  this->k_numRowEntries_;
4456 
4457  // Allocate space for local indices.
4458  // If GO and LO are the same size, we can reuse the existing
4459  // array of 1-D index storage to convert column indices from
4460  // GO to LO. Otherwise, we'll just allocate a new buffer.
4461  constexpr bool LO_GO_same = std::is_same<LO, GO>::value;
4462  if (LO_GO_same) {
4463  // This prevents a build error (illegal assignment) if
4464  // LO_GO_same is _not_ true. Only the first branch
4465  // (returning k_gblInds1D_) should ever get taken.
4466  k_lclInds1D_ = Kokkos::Impl::if_c<LO_GO_same,
4468  lcl_col_inds_type>::select (k_gblInds1D_, k_lclInds1D_);
4469  }
4470  else {
4471  if (k_rowPtrs_.extent (0) == 0) {
4472  errStrm << "k_rowPtrs_.extent(0) == 0. This should never "
4473  "happen here. Please report this bug to the Tpetra developers."
4474  << endl;
4475  // Need to return early.
4476  return std::make_pair (Tpetra::Details::OrdinalTraits<size_t>::invalid (),
4477  errStrm.str ());
4478  }
4479  const auto numEnt = ::Tpetra::Details::getEntryOnHost (k_rowPtrs_, lclNumRows);
4480 
4481  // mfh 17 Dec 2016: We don't need initial zero-fill of
4482  // k_lclInds1D_, because we will fill it below anyway.
4483  // AllowPadding would only help for aligned access (e.g.,
4484  // for vectorization) if we also were to pad each row to the
4485  // same alignment, so we'll skip AllowPadding for now.
4486 
4487  // using Kokkos::AllowPadding;
4488  using Kokkos::view_alloc;
4489  using Kokkos::WithoutInitializing;
4490 
4491  // When giving the label as an argument to
4492  // Kokkos::view_alloc, the label must be a string and not a
4493  // char*, else the code won't compile. This is because
4494  // view_alloc also allows a raw pointer as its first
4495  // argument. See
4496  // https://github.com/kokkos/kokkos/issues/434. This is a
4497  // large allocation typically, so the overhead of creating
4498  // an std::string is minor.
4499  const std::string label ("Tpetra::CrsGraph::lclind");
4500  if (verbose) {
4501  std::ostringstream os;
4502  os << *prefix << "(Re)allocate k_lclInds1D_: old="
4503  << k_lclInds1D_.extent(0) << ", new=" << numEnt << endl;
4504  std::cerr << os.str();
4505  }
4506  k_lclInds1D_ =
4507  lcl_col_inds_type (view_alloc (label, WithoutInitializing), numEnt);
4508  }
4509 
4510  auto lclColMap = colMap.getLocalMap ();
4511  // This is a "device mirror" of the host View h_numRowEnt.
4512  //
4513  // NOTE (mfh 27 Sep 2016) Currently, the right way to get a
4514  // Device instance is to use its default constructor. See the
4515  // following Kokkos issue:
4516  //
4517  // https://github.com/kokkos/kokkos/issues/442
4518  if (verbose) {
4519  std::ostringstream os;
4520  os << *prefix << "Allocate device mirror k_numRowEnt: "
4521  << h_numRowEnt.extent(0) << endl;
4522  std::cerr << os.str();
4523  }
4524  auto k_numRowEnt = Kokkos::create_mirror_view (device_type (), h_numRowEnt);
4525 
4527  lclNumErrs =
4528  convertColumnIndicesFromGlobalToLocal<LO, GO, DT, offset_type, num_ent_type> (k_lclInds1D_,
4529  k_gblInds1D_,
4530  k_rowPtrs_,
4531  lclColMap,
4532  k_numRowEnt);
4533  if (lclNumErrs != 0) {
4534  const int myRank = [this] () {
4535  auto map = this->getMap ();
4536  if (map.is_null ()) {
4537  return 0;
4538  }
4539  else {
4540  auto comm = map->getComm ();
4541  return comm.is_null () ? 0 : comm->getRank ();
4542  }
4543  } ();
4544  const bool pluralNumErrs = (lclNumErrs != static_cast<size_t> (1));
4545  errStrm << "(Process " << myRank << ") When converting column "
4546  "indices from global to local, we encountered " << lclNumErrs
4547  << " ind" << (pluralNumErrs ? "ices" : "ex")
4548  << " that do" << (pluralNumErrs ? "es" : "")
4549  << " not live in the column Map on this process." << endl;
4550  }
4551 
4552  // We've converted column indices from global to local, so we
4553  // can deallocate the global column indices (which we know are
4554  // in 1-D storage, because the graph has static profile).
4555  if (verbose) {
4556  std::ostringstream os;
4557  os << *prefix << "Free k_gblInds1D_: "
4558  << k_gblInds1D_.extent(0) << endl;
4559  std::cerr << os.str();
4560  }
4561  k_gblInds1D_ = gbl_col_inds_type ();
4562  } // globallyIndexed() && lclNumRows > 0
4563 
4564  this->lclGraph_ = local_graph_type (this->k_lclInds1D_, this->k_rowPtrs_);
4565  this->indicesAreLocal_ = true;
4566  this->indicesAreGlobal_ = false;
4567  this->checkInternalState ();
4568 
4569  return std::make_pair (lclNumErrs, errStrm.str ());
4570  }
4571 
4572  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4573  void
4575  makeColMap (Teuchos::Array<int>& remotePIDs)
4576  {
4578  using std::endl;
4579  const char tfecfFuncName[] = "makeColMap";
4580 
4581  ProfilingRegion regionSortAndMerge ("Tpetra::CrsGraph::makeColMap");
4582  std::unique_ptr<std::string> prefix;
4583  if (verbose_) {
4584  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4585  std::ostringstream os;
4586  os << *prefix << "Start" << endl;
4587  std::cerr << os.str();
4588  }
4589 
4590  // this->colMap_ should be null at this point, but we accept the
4591  // future possibility that it might not be (esp. if we decide
4592  // later to support graph structure changes after first
4593  // fillComplete, which CrsGraph does not currently (as of 12 Feb
4594  // 2017) support).
4595  Teuchos::RCP<const map_type> colMap = this->colMap_;
4596  const bool sortEachProcsGids =
4597  this->sortGhostsAssociatedWithEachProcessor_;
4598 
4599  // FIXME (mfh 12 Feb 2017) ::Tpetra::Details::makeColMap returns a
4600  // per-process error code. If an error does occur on a process,
4601  // ::Tpetra::Details::makeColMap does NOT promise that all processes will
4602  // notice that error. This is the caller's responsibility. For
4603  // now, we only propagate (to all processes) and report the error
4604  // in debug mode. In the future, we need to add the local/global
4605  // error handling scheme used in BlockCrsMatrix to this class.
4606  if (debug_) {
4607  using Teuchos::outArg;
4608  using Teuchos::REDUCE_MIN;
4609  using Teuchos::reduceAll;
4610 
4611  std::ostringstream errStrm;
4612  const int lclErrCode =
4613  Details::makeColMap (colMap, remotePIDs,
4614  getDomainMap (), *this, sortEachProcsGids, &errStrm);
4615  auto comm = this->getComm ();
4616  if (! comm.is_null ()) {
4617  const int lclSuccess = (lclErrCode == 0) ? 1 : 0;
4618  int gblSuccess = 0; // output argument
4619  reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess,
4620  outArg (gblSuccess));
4621  if (gblSuccess != 1) {
4622  std::ostringstream os;
4623  Details::gathervPrint (os, errStrm.str (), *comm);
4624  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4625  (true, std::runtime_error, ": An error happened on at "
4626  "least one process in the CrsGraph's communicator. "
4627  "Here are all processes' error messages:" << std::endl
4628  << os.str ());
4629  }
4630  }
4631  }
4632  else {
4633  (void) Details::makeColMap (colMap, remotePIDs,
4634  getDomainMap (), *this, sortEachProcsGids, nullptr);
4635  }
4636  // See above. We want to admit the possibility of makeColMap
4637  // actually revising an existing column Map, even though that
4638  // doesn't currently (as of 10 May 2017) happen.
4639  this->colMap_ = colMap;
4640 
4641  checkInternalState ();
4642  if (verbose_) {
4643  std::ostringstream os;
4644  os << *prefix << "Done" << endl;
4645  std::cerr << os.str();
4646  }
4647  }
4648 
4649 
4650  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4651  void
4653  sortAndMergeAllIndices (const bool sorted, const bool merged)
4654  {
4655  using std::endl;
4656  using LO = LocalOrdinal;
4657  using host_execution_space =
4658  typename Kokkos::View<LO*, device_type>::HostMirror::
4659  execution_space;
4660  using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
4661  const char tfecfFuncName[] = "sortAndMergeAllIndices";
4662  Details::ProfilingRegion regionSortAndMerge
4663  ("Tpetra::CrsGraph::sortAndMergeAllIndices");
4664 
4665  std::unique_ptr<std::string> prefix;
4666  if (verbose_) {
4667  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4668  std::ostringstream os;
4669  os << *prefix << "Start: "
4670  << "sorted=" << (sorted ? "true" : "false")
4671  << ", merged=" << (merged ? "true" : "false") << endl;
4672  std::cerr << os.str();
4673  }
4674  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4675  (this->isGloballyIndexed(), std::logic_error,
4676  "This method may only be called after makeIndicesLocal." );
4677  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4678  (! merged && this->isStorageOptimized(), std::logic_error,
4679  "The graph is already storage optimized, so we shouldn't be "
4680  "merging any indices. "
4681  "Please report this bug to the Tpetra developers.");
4682 
4683  if (! sorted || ! merged) {
4684  const LO lclNumRows(this->getNodeNumRows());
4685  auto range = range_type(0, lclNumRows);
4686 
4687  // FIXME (mfh 08 May 2017) Loops below assume CUDA UVM.
4688  if (verbose_) {
4689  size_t totalNumDups = 0;
4690  Kokkos::parallel_reduce(range,
4691  [this, sorted, merged] (const LO lclRow, size_t& numDups)
4692  {
4693  const RowInfo rowInfo = this->getRowInfo(lclRow);
4694  numDups += this->sortAndMergeRowIndices(rowInfo, sorted, merged);
4695  },
4696  totalNumDups);
4697  std::ostringstream os;
4698  os << *prefix << "totalNumDups=" << totalNumDups << endl;
4699  std::cerr << os.str();
4700  }
4701  else {
4702  // FIXME (mfh 08 May 2017) This may assume CUDA UVM.
4703  Kokkos::parallel_for(range,
4704  [this, sorted, merged] (const LO lclRow)
4705  {
4706  const RowInfo rowInfo = this->getRowInfo(lclRow);
4707  this->sortAndMergeRowIndices(rowInfo, sorted, merged);
4708  });
4709  }
4710  this->indicesAreSorted_ = true; // we just sorted every row
4711  this->noRedundancies_ = true; // we just merged every row
4712  }
4713 
4714  if (verbose_) {
4715  std::ostringstream os;
4716  os << *prefix << "Done" << endl;
4717  std::cerr << os.str();
4718  }
4719  }
4720 
4721  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4722  void
4724  makeImportExport (Teuchos::Array<int>& remotePIDs,
4725  const bool useRemotePIDs)
4726  {
4727  using ::Tpetra::Details::ProfilingRegion;
4728  using Teuchos::ParameterList;
4729  using Teuchos::RCP;
4730  using Teuchos::rcp;
4731  const char tfecfFuncName[] = "makeImportExport: ";
4732  ProfilingRegion regionMIE ("Tpetra::CrsGraph::makeImportExport");
4733 
4734  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4735  (! this->hasColMap (), std::logic_error,
4736  "This method may not be called unless the graph has a column Map.");
4737  RCP<ParameterList> params = this->getNonconstParameterList (); // could be null
4738 
4739  // Don't do any checks to see if we need to create the Import, if
4740  // it exists already.
4741  //
4742  // FIXME (mfh 25 Mar 2013) This will become incorrect if we
4743  // change CrsGraph in the future to allow changing the column
4744  // Map after fillComplete. For now, the column Map is fixed
4745  // after the first fillComplete call.
4746  if (importer_.is_null ()) {
4747  // Create the Import instance if necessary.
4748  if (domainMap_ != colMap_ && (! domainMap_->isSameAs (*colMap_))) {
4749  if (params.is_null () || ! params->isSublist ("Import")) {
4750  if (useRemotePIDs) {
4751  importer_ = rcp (new import_type (domainMap_, colMap_, remotePIDs));
4752  }
4753  else {
4754  importer_ = rcp (new import_type (domainMap_, colMap_));
4755  }
4756  }
4757  else {
4758  RCP<ParameterList> importSublist = sublist (params, "Import", true);
4759  if (useRemotePIDs) {
4760  RCP<import_type> newImp =
4761  rcp (new import_type (domainMap_, colMap_, remotePIDs,
4762  importSublist));
4763  importer_ = newImp;
4764  }
4765  else {
4766  importer_ = rcp (new import_type (domainMap_, colMap_, importSublist));
4767  }
4768  }
4769  }
4770  }
4771 
4772  // Don't do any checks to see if we need to create the Export, if
4773  // it exists already.
4774  if (exporter_.is_null ()) {
4775  // Create the Export instance if necessary.
4776  if (rangeMap_ != rowMap_ && ! rangeMap_->isSameAs (*rowMap_)) {
4777  if (params.is_null () || ! params->isSublist ("Export")) {
4778  exporter_ = rcp (new export_type (rowMap_, rangeMap_));
4779  }
4780  else {
4781  RCP<ParameterList> exportSublist = sublist (params, "Export", true);
4782  exporter_ = rcp (new export_type (rowMap_, rangeMap_, exportSublist));
4783  }
4784  }
4785  }
4786  }
4787 
4788 
4789  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4790  std::string
4793  {
4794  std::ostringstream oss;
4795  oss << dist_object_type::description ();
4796  if (isFillComplete ()) {
4797  oss << "{status = fill complete"
4798  << ", global rows = " << getGlobalNumRows()
4799  << ", global cols = " << getGlobalNumCols()
4800  << ", global num entries = " << getGlobalNumEntries()
4801  << "}";
4802  }
4803  else {
4804  oss << "{status = fill not complete"
4805  << ", global rows = " << getGlobalNumRows()
4806  << "}";
4807  }
4808  return oss.str();
4809  }
4810 
4811 
4812  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4813  void
4815  describe (Teuchos::FancyOStream &out,
4816  const Teuchos::EVerbosityLevel verbLevel) const
4817  {
4818  using Teuchos::ArrayView;
4819  using Teuchos::Comm;
4820  using Teuchos::RCP;
4821  using Teuchos::VERB_DEFAULT;
4822  using Teuchos::VERB_NONE;
4823  using Teuchos::VERB_LOW;
4824  using Teuchos::VERB_MEDIUM;
4825  using Teuchos::VERB_HIGH;
4826  using Teuchos::VERB_EXTREME;
4827  using std::endl;
4828  using std::setw;
4829 
4830  Teuchos::EVerbosityLevel vl = verbLevel;
4831  if (vl == VERB_DEFAULT) vl = VERB_LOW;
4832  RCP<const Comm<int> > comm = this->getComm();
4833  const int myImageID = comm->getRank(),
4834  numImages = comm->getSize();
4835  size_t width = 1;
4836  for (size_t dec=10; dec<getGlobalNumRows(); dec *= 10) {
4837  ++width;
4838  }
4839  width = std::max<size_t> (width, static_cast<size_t> (11)) + 2;
4840  Teuchos::OSTab tab (out);
4841  // none: print nothing
4842  // low: print O(1) info from node 0
4843  // medium: print O(P) info, num entries per node
4844  // high: print O(N) info, num entries per row
4845  // extreme: print O(NNZ) info: print graph indices
4846  //
4847  // for medium and higher, print constituent objects at specified verbLevel
4848  if (vl != VERB_NONE) {
4849  if (myImageID == 0) out << this->description() << std::endl;
4850  // O(1) globals, minus what was already printed by description()
4851  if (isFillComplete() && myImageID == 0) {
4852  out << "Global max number of row entries = " << globalMaxNumRowEntries_ << std::endl;
4853  }
4854  // constituent objects
4855  if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
4856  if (myImageID == 0) out << "\nRow map: " << std::endl;
4857  rowMap_->describe(out,vl);
4858  if (colMap_ != Teuchos::null) {
4859  if (myImageID == 0) out << "\nColumn map: " << std::endl;
4860  colMap_->describe(out,vl);
4861  }
4862  if (domainMap_ != Teuchos::null) {
4863  if (myImageID == 0) out << "\nDomain map: " << std::endl;
4864  domainMap_->describe(out,vl);
4865  }
4866  if (rangeMap_ != Teuchos::null) {
4867  if (myImageID == 0) out << "\nRange map: " << std::endl;
4868  rangeMap_->describe(out,vl);
4869  }
4870  }
4871  // O(P) data
4872  if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
4873  for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
4874  if (myImageID == imageCtr) {
4875  out << "Node ID = " << imageCtr << std::endl
4876  << "Node number of entries = " << this->getNodeNumEntries () << std::endl
4877  << "Node max number of entries = " << nodeMaxNumRowEntries_ << std::endl;
4878  if (! indicesAreAllocated ()) {
4879  out << "Indices are not allocated." << std::endl;
4880  }
4881  }
4882  comm->barrier();
4883  comm->barrier();
4884  comm->barrier();
4885  }
4886  }
4887  // O(N) and O(NNZ) data
4888  if (vl == VERB_HIGH || vl == VERB_EXTREME) {
4889  for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
4890  if (myImageID == imageCtr) {
4891  out << std::setw(width) << "Node ID"
4892  << std::setw(width) << "Global Row"
4893  << std::setw(width) << "Num Entries";
4894  if (vl == VERB_EXTREME) {
4895  out << " Entries";
4896  }
4897  out << std::endl;
4898  const LocalOrdinal lclNumRows =
4899  static_cast<LocalOrdinal> (this->getNodeNumRows ());
4900  for (LocalOrdinal r=0; r < lclNumRows; ++r) {
4901  const RowInfo rowinfo = this->getRowInfo (r);
4902  GlobalOrdinal gid = rowMap_->getGlobalElement(r);
4903  out << std::setw(width) << myImageID
4904  << std::setw(width) << gid
4905  << std::setw(width) << rowinfo.numEntries;
4906  if (vl == VERB_EXTREME) {
4907  out << " ";
4908  if (isGloballyIndexed()) {
4909  ArrayView<const GlobalOrdinal> rowview = getGlobalView(rowinfo);
4910  for (size_t j=0; j < rowinfo.numEntries; ++j) out << rowview[j] << " ";
4911  }
4912  else if (isLocallyIndexed()) {
4913  ArrayView<const LocalOrdinal> rowview = getLocalView(rowinfo);
4914  for (size_t j=0; j < rowinfo.numEntries; ++j) out << colMap_->getGlobalElement(rowview[j]) << " ";
4915  }
4916  }
4917  out << std::endl;
4918  }
4919  }
4920  comm->barrier();
4921  comm->barrier();
4922  comm->barrier();
4923  }
4924  }
4925  }
4926  }
4927 
4928 
4929  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4930  bool
4932  checkSizes (const SrcDistObject& /* source */)
4933  {
4934  // It's not clear what kind of compatibility checks on sizes can
4935  // be performed here. Epetra_CrsGraph doesn't check any sizes for
4936  // compatibility.
4937  return true;
4938  }
4939 
4940  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4941  void
4944  (const SrcDistObject& source,
4945  const size_t numSameIDs,
4946  const Kokkos::DualView<const local_ordinal_type*,
4947  buffer_device_type>& permuteToLIDs,
4948  const Kokkos::DualView<const local_ordinal_type*,
4949  buffer_device_type>& permuteFromLIDs)
4950  {
4951  using std::endl;
4952  using LO = local_ordinal_type;
4953  using GO = global_ordinal_type;
4954  using this_type = CrsGraph<LO, GO, node_type>;
4955  using row_graph_type = RowGraph<LO, GO, node_type>;
4956  const char tfecfFuncName[] = "copyAndPermute: ";
4957  const bool verbose = verbose_;
4958 
4959  std::unique_ptr<std::string> prefix;
4960  if (verbose) {
4961  prefix = this->createPrefix("CrsGraph", "copyAndPermute");
4962  std::ostringstream os;
4963  os << *prefix << endl;
4964  std::cerr << os.str ();
4965  }
4966 
4967  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4968  (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0),
4969  std::runtime_error, "permuteToLIDs.extent(0) = "
4970  << permuteToLIDs.extent (0) << " != permuteFromLIDs.extent(0) = "
4971  << permuteFromLIDs.extent (0) << ".");
4972 
4973  // We know from checkSizes that the source object is a
4974  // row_graph_type, so we don't need to check again.
4975  const row_graph_type& srcRowGraph =
4976  dynamic_cast<const row_graph_type&> (source);
4977 
4978  if (verbose) {
4979  std::ostringstream os;
4980  os << *prefix << "Compute padding" << endl;
4981  std::cerr << os.str ();
4982  }
4983  auto padding = computeCrsPadding(srcRowGraph, numSameIDs,
4984  permuteToLIDs, permuteFromLIDs, verbose);
4985  applyCrsPadding(*padding, verbose);
4986 
4987  // If the source object is actually a CrsGraph, we can use view
4988  // mode instead of copy mode to access the entries in each row,
4989  // if the graph is not fill complete.
4990  const this_type* srcCrsGraph =
4991  dynamic_cast<const this_type*> (&source);
4992 
4993  const map_type& srcRowMap = *(srcRowGraph.getRowMap());
4994  const map_type& tgtRowMap = *(getRowMap());
4995  const bool src_filled = srcRowGraph.isFillComplete();
4996  Teuchos::Array<GO> row_copy;
4997  LO myid = 0;
4998 
4999  //
5000  // "Copy" part of "copy and permute."
5001  //
5002  if (src_filled || srcCrsGraph == nullptr) {
5003  if (verbose) {
5004  std::ostringstream os;
5005  os << *prefix << "src_filled || srcCrsGraph == nullptr" << endl;
5006  std::cerr << os.str ();
5007  }
5008  // If the source graph is fill complete, we can't use view mode,
5009  // because the data might be stored in a different format not
5010  // compatible with the expectations of view mode. Also, if the
5011  // source graph is not a CrsGraph, we can't use view mode,
5012  // because RowGraph only provides copy mode access to the data.
5013  for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
5014  const GO gid = srcRowMap.getGlobalElement (myid);
5015  size_t row_length = srcRowGraph.getNumEntriesInGlobalRow (gid);
5016  row_copy.resize (row_length);
5017  size_t check_row_length = 0;
5018  srcRowGraph.getGlobalRowCopy (gid, row_copy (), check_row_length);
5019  this->insertGlobalIndices (gid, row_copy ());
5020  }
5021  } else {
5022  if (verbose) {
5023  std::ostringstream os;
5024  os << *prefix << "! src_filled && srcCrsGraph != nullptr" << endl;
5025  std::cerr << os.str ();
5026  }
5027  for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
5028  const GO gid = srcRowMap.getGlobalElement (myid);
5029  Teuchos::ArrayView<const GO> row;
5030  srcCrsGraph->getGlobalRowView (gid, row);
5031  this->insertGlobalIndices (gid, row);
5032  }
5033  }
5034 
5035  //
5036  // "Permute" part of "copy and permute."
5037  //
5038  auto permuteToLIDs_h = permuteToLIDs.view_host ();
5039  auto permuteFromLIDs_h = permuteFromLIDs.view_host ();
5040 
5041  if (src_filled || srcCrsGraph == nullptr) {
5042  for (LO i = 0; i < static_cast<LO> (permuteToLIDs_h.extent (0)); ++i) {
5043  const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs_h[i]);
5044  const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs_h[i]);
5045  size_t row_length = srcRowGraph.getNumEntriesInGlobalRow (srcgid);
5046  row_copy.resize (row_length);
5047  size_t check_row_length = 0;
5048  srcRowGraph.getGlobalRowCopy (srcgid, row_copy (), check_row_length);
5049  this->insertGlobalIndices (mygid, row_copy ());
5050  }
5051  } else {
5052  for (LO i = 0; i < static_cast<LO> (permuteToLIDs_h.extent (0)); ++i) {
5053  const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs_h[i]);
5054  const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs_h[i]);
5055  Teuchos::ArrayView<const GO> row;
5056  srcCrsGraph->getGlobalRowView (srcgid, row);
5057  this->insertGlobalIndices (mygid, row);
5058  }
5059  }
5060 
5061  if (verbose) {
5062  std::ostringstream os;
5063  os << *prefix << "Done" << endl;
5064  std::cerr << os.str ();
5065  }
5066  }
5067 
5068  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5069  void
5070  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5071  applyCrsPadding(const padding_type& padding,
5072  const bool verbose)
5073  {
5074  using Details::ProfilingRegion;
5075  using Details::padCrsArrays;
5076  using std::endl;
5077  using LO = local_ordinal_type;
5078  using execution_space = typename device_type::execution_space;
5079  using row_ptrs_type =
5080  typename local_graph_type::row_map_type::non_const_type;
5081  using range_policy =
5082  Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
5083  const char tfecfFuncName[] = "applyCrsPadding";
5084  ProfilingRegion regionCAP("Tpetra::CrsGraph::applyCrsPadding");
5085 
5086  std::unique_ptr<std::string> prefix;
5087  if (verbose) {
5088  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5089  std::ostringstream os;
5090  os << *prefix << "padding: ";
5091  padding.print(os);
5092  os << endl;
5093  std::cerr << os.str();
5094  }
5095  const int myRank = ! verbose ? -1 : [&] () {
5096  auto map = this->getMap();
5097  if (map.is_null()) {
5098  return -1;
5099  }
5100  auto comm = map->getComm();
5101  if (comm.is_null()) {
5102  return -1;
5103  }
5104  return comm->getRank();
5105  } ();
5106 
5107  // FIXME (mfh 10 Feb 2020) We shouldn't actually reallocate
5108  // row_ptrs_beg or allocate row_ptrs_end unless the allocation
5109  // size needs to increase. That should be the job of
5110  // padCrsArrays.
5111 
5112  // Assume global indexing we don't have any indices yet
5113  if (! indicesAreAllocated()) {
5114  if (verbose) {
5115  std::ostringstream os;
5116  os << *prefix << "Call allocateIndices" << endl;
5117  std::cerr << os.str();
5118  }
5119  allocateIndices(GlobalIndices, verbose);
5120  }
5121  TEUCHOS_ASSERT( indicesAreAllocated() );
5122 
5123  // Making copies here because k_rowPtrs_ has a const type. Otherwise, we
5124  // would use it directly.
5125 
5126  if (verbose) {
5127  std::ostringstream os;
5128  os << *prefix << "Allocate row_ptrs_beg: "
5129  << k_rowPtrs_.extent(0) << endl;
5130  std::cerr << os.str();
5131  }
5132  using Kokkos::view_alloc;
5133  using Kokkos::WithoutInitializing;
5134  row_ptrs_type row_ptrs_beg(
5135  view_alloc("row_ptrs_beg", WithoutInitializing),
5136  k_rowPtrs_.extent(0));
5137  Kokkos::deep_copy(row_ptrs_beg, k_rowPtrs_);
5138 
5139  const size_t N = row_ptrs_beg.extent(0) == 0 ? size_t(0) :
5140  size_t(row_ptrs_beg.extent(0) - 1);
5141  if (verbose) {
5142  std::ostringstream os;
5143  os << *prefix << "Allocate row_ptrs_end: " << N << endl;
5144  std::cerr << os.str();
5145  }
5146  row_ptrs_type row_ptrs_end(
5147  view_alloc("row_ptrs_end", WithoutInitializing), N);
5148 
5149  const bool refill_num_row_entries = k_numRowEntries_.extent(0) != 0;
5150  if (refill_num_row_entries) { // Case 1: Unpacked storage
5151  // We can't assume correct *this capture until C++17, and it's
5152  // likely more efficient just to capture what we need anyway.
5153  auto num_row_entries = this->k_numRowEntries_;
5154  Kokkos::parallel_for
5155  ("Fill end row pointers", range_policy(0, N),
5156  KOKKOS_LAMBDA (const size_t i) {
5157  row_ptrs_end(i) = row_ptrs_beg(i) + num_row_entries(i);
5158  });
5159  }
5160  else {
5161  // FIXME (mfh 10 Feb 2020) Fix padCrsArrays so that if packed
5162  // storage, we don't need row_ptr_end to be separate allocation;
5163  // could just have it alias row_ptr_beg+1.
5164  Kokkos::parallel_for
5165  ("Fill end row pointers", range_policy(0, N),
5166  KOKKOS_LAMBDA (const size_t i) {
5167  row_ptrs_end(i) = row_ptrs_beg(i+1);
5168  });
5169  }
5170 
5171  if (isGloballyIndexed()) {
5172  padCrsArrays(row_ptrs_beg, row_ptrs_end, k_gblInds1D_,
5173  padding, myRank, verbose);
5174  }
5175  else {
5176  padCrsArrays(row_ptrs_beg, row_ptrs_end, k_lclInds1D_,
5177  padding, myRank, verbose);
5178  }
5179 
5180  if (refill_num_row_entries) {
5181  auto num_row_entries = this->k_numRowEntries_;
5182  Kokkos::parallel_for
5183  ("Fill num entries", range_policy(0, N),
5184  KOKKOS_LAMBDA (const size_t i) {
5185  num_row_entries(i) = row_ptrs_end(i) - row_ptrs_beg(i);
5186  });
5187  }
5188  if (verbose) {
5189  std::ostringstream os;
5190  os << *prefix << "Reassign k_rowPtrs_; old size: "
5191  << k_rowPtrs_.extent(0) << ", new size: "
5192  << row_ptrs_beg.extent(0) << endl;
5193  std::cerr << os.str();
5194  TEUCHOS_ASSERT( k_rowPtrs_.extent(0) == row_ptrs_beg.extent(0) );
5195  }
5196  this->k_rowPtrs_ = row_ptrs_beg;
5197  }
5198 
5199  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5200  std::unique_ptr<
5201  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5202  >
5203  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5204  computeCrsPadding(
5205  const RowGraph<LocalOrdinal,GlobalOrdinal,Node>& source,
5206  const size_t numSameIDs,
5207  const Kokkos::DualView<const local_ordinal_type*,
5208  buffer_device_type>& permuteToLIDs,
5209  const Kokkos::DualView<const local_ordinal_type*,
5210  buffer_device_type>& permuteFromLIDs,
5211  const bool verbose) const
5212  {
5213  using LO = local_ordinal_type;
5214  using std::endl;
5215 
5216  std::unique_ptr<std::string> prefix;
5217  if (verbose) {
5218  prefix = this->createPrefix("CrsGraph",
5219  "computeCrsPadding(same & permute)");
5220  std::ostringstream os;
5221  os << *prefix << "{numSameIDs: " << numSameIDs
5222  << ", numPermutes: " << permuteFromLIDs.extent(0) << "}"
5223  << endl;
5224  std::cerr << os.str();
5225  }
5226 
5227  const int myRank = [&] () {
5228  auto comm = rowMap_.is_null() ? Teuchos::null :
5229  rowMap_->getComm();
5230  return comm.is_null() ? -1 : comm->getRank();
5231  } ();
5232  std::unique_ptr<padding_type> padding(
5233  new padding_type(myRank, numSameIDs,
5234  permuteFromLIDs.extent(0)));
5235 
5236  // We're accessing data on host, so make sure all device
5237  // computations on the graphs' data are done.
5238  //
5239  // NOTE (mfh 08 Feb 2020) If we ever get rid of this fence, look
5240  // carefully in computeCrsPaddingFor{Same,Permuted}IDs to see if
5241  // we need a fence there.
5242  Kokkos::fence();
5243 
5244  computeCrsPaddingForSameIDs(*padding, source,
5245  static_cast<LO>(numSameIDs));
5246  computeCrsPaddingForPermutedIDs(*padding, source, permuteToLIDs,
5247  permuteFromLIDs);
5248  return padding;
5249  }
5250 
5251  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5252  void
5253  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5254  computeCrsPaddingForSameIDs(
5255  padding_type& padding,
5256  const RowGraph<local_ordinal_type, global_ordinal_type,
5257  node_type>& source,
5258  const local_ordinal_type numSameIDs) const
5259  {
5260  using LO = local_ordinal_type;
5261  using GO = global_ordinal_type;
5262  using Details::Impl::getRowGraphGlobalRow;
5263  using std::endl;
5264  const char tfecfFuncName[] = "computeCrsPaddingForSameIds";
5265 
5266  std::unique_ptr<std::string> prefix;
5267  const bool verbose = verbose_;
5268  if (verbose) {
5269  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5270  std::ostringstream os;
5271  os << *prefix << "numSameIDs: " << numSameIDs << endl;
5272  std::cerr << os.str();
5273  }
5274 
5275  if (numSameIDs == 0) {
5276  return;
5277  }
5278 
5279  const map_type& srcRowMap = *(source.getRowMap());
5280  const map_type& tgtRowMap = *rowMap_;
5281  using this_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
5282  const this_type* srcCrs = dynamic_cast<const this_type*>(&source);
5283  const bool src_is_unique =
5284  srcCrs == nullptr ? false : srcCrs->isMerged();
5285  const bool tgt_is_unique = this->isMerged();
5286 
5287  std::vector<GO> srcGblColIndsScratch;
5288  std::vector<GO> tgtGblColIndsScratch;
5289  for (LO lclRowInd = 0; lclRowInd < numSameIDs; ++lclRowInd) {
5290  const GO srcGblRowInd = srcRowMap.getGlobalElement(lclRowInd);
5291  const GO tgtGblRowInd = tgtRowMap.getGlobalElement(lclRowInd);
5292  auto srcGblColInds = getRowGraphGlobalRow(
5293  srcGblColIndsScratch, source, srcGblRowInd);
5294  auto tgtGblColInds = getRowGraphGlobalRow(
5295  tgtGblColIndsScratch, *this, tgtGblRowInd);
5296  padding.update_same(lclRowInd, tgtGblColInds.getRawPtr(),
5297  tgtGblColInds.size(), tgt_is_unique,
5298  srcGblColInds.getRawPtr(),
5299  srcGblColInds.size(), src_is_unique);
5300  }
5301  if (verbose) {
5302  std::ostringstream os;
5303  os << *prefix << "Done" << endl;
5304  std::cerr << os.str();
5305  }
5306  }
5307 
5308  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5309  void
5310  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5311  computeCrsPaddingForPermutedIDs(
5312  padding_type& padding,
5313  const RowGraph<local_ordinal_type, global_ordinal_type,
5314  node_type>& source,
5315  const Kokkos::DualView<const local_ordinal_type*,
5316  buffer_device_type>& permuteToLIDs,
5317  const Kokkos::DualView<const local_ordinal_type*,
5318  buffer_device_type>& permuteFromLIDs) const
5319  {
5320  using LO = local_ordinal_type;
5321  using GO = global_ordinal_type;
5322  using Details::Impl::getRowGraphGlobalRow;
5323  using std::endl;
5324  const char tfecfFuncName[] = "computeCrsPaddingForPermutedIds";
5325 
5326  std::unique_ptr<std::string> prefix;
5327  const bool verbose = verbose_;
5328  if (verbose) {
5329  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5330  std::ostringstream os;
5331  os << *prefix << "permuteToLIDs.extent(0): "
5332  << permuteToLIDs.extent(0)
5333  << ", permuteFromLIDs.extent(0): "
5334  << permuteFromLIDs.extent(0) << endl;
5335  std::cerr << os.str();
5336  }
5337 
5338  if (permuteToLIDs.extent(0) == 0) {
5339  return;
5340  }
5341 
5342  const map_type& srcRowMap = *(source.getRowMap());
5343  const map_type& tgtRowMap = *rowMap_;
5344  using this_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
5345  const this_type* srcCrs = dynamic_cast<const this_type*>(&source);
5346  const bool src_is_unique =
5347  srcCrs == nullptr ? false : srcCrs->isMerged();
5348  const bool tgt_is_unique = this->isMerged();
5349 
5350  TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host() );
5351  auto permuteToLIDs_h = permuteToLIDs.view_host();
5352  TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host() );
5353  auto permuteFromLIDs_h = permuteFromLIDs.view_host();
5354 
5355  std::vector<GO> srcGblColIndsScratch;
5356  std::vector<GO> tgtGblColIndsScratch;
5357  const LO numPermutes = static_cast<LO>(permuteToLIDs_h.extent(0));
5358  for (LO whichPermute = 0; whichPermute < numPermutes; ++whichPermute) {
5359  const LO srcLclRowInd = permuteFromLIDs_h[whichPermute];
5360  const GO srcGblRowInd = srcRowMap.getGlobalElement(srcLclRowInd);
5361  auto srcGblColInds = getRowGraphGlobalRow(
5362  srcGblColIndsScratch, source, srcGblRowInd);
5363  const LO tgtLclRowInd = permuteToLIDs_h[whichPermute];
5364  const GO tgtGblRowInd = tgtRowMap.getGlobalElement(tgtLclRowInd);
5365  auto tgtGblColInds = getRowGraphGlobalRow(
5366  tgtGblColIndsScratch, *this, tgtGblRowInd);
5367  padding.update_permute(whichPermute, tgtLclRowInd,
5368  tgtGblColInds.getRawPtr(),
5369  tgtGblColInds.size(), tgt_is_unique,
5370  srcGblColInds.getRawPtr(),
5371  srcGblColInds.size(), src_is_unique);
5372  }
5373 
5374  if (verbose) {
5375  std::ostringstream os;
5376  os << *prefix << "Done" << endl;
5377  std::cerr << os.str();
5378  }
5379  }
5380 
5381  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5382  std::unique_ptr<
5383  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5384  >
5385  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5386  computeCrsPaddingForImports(
5387  const Kokkos::DualView<const local_ordinal_type*,
5388  buffer_device_type>& importLIDs,
5389  Kokkos::DualView<packet_type*, buffer_device_type> imports,
5390  Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
5391  const bool verbose) const
5392  {
5393  using Details::Impl::getRowGraphGlobalRow;
5394  using std::endl;
5395  using LO = local_ordinal_type;
5396  using GO = global_ordinal_type;
5397  const char tfecfFuncName[] = "computeCrsPaddingForImports";
5398 
5399  std::unique_ptr<std::string> prefix;
5400  if (verbose) {
5401  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5402  std::ostringstream os;
5403  os << *prefix << "importLIDs.extent(0): "
5404  << importLIDs.extent(0)
5405  << ", imports.extent(0): "
5406  << imports.extent(0)
5407  << ", numPacketsPerLID.extent(0): "
5408  << numPacketsPerLID.extent(0) << endl;
5409  std::cerr << os.str();
5410  }
5411 
5412  const LO numImports = static_cast<LO>(importLIDs.extent(0));
5413  const int myRank = [&] () {
5414  auto comm = rowMap_.is_null() ? Teuchos::null :
5415  rowMap_->getComm();
5416  return comm.is_null() ? -1 : comm->getRank();
5417  } ();
5418  std::unique_ptr<padding_type> padding(
5419  new padding_type(myRank, numImports));
5420  Kokkos::fence(); // Make sure device sees changes made by host
5421  if (imports.need_sync_host()) {
5422  imports.sync_host();
5423  }
5424  auto imports_h = imports.view_host();
5425  if (numPacketsPerLID.need_sync_host ()) {
5426  numPacketsPerLID.sync_host();
5427  }
5428  auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5429 
5430  TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
5431  auto importLIDs_h = importLIDs.view_host();
5432 
5433  const map_type& tgtRowMap = *rowMap_;
5434  // Always merge source column indices, since isMerged() is
5435  // per-process state, and we don't know its value on other
5436  // processes that sent us data.
5437  constexpr bool src_is_unique = false;
5438  const bool tgt_is_unique = isMerged();
5439 
5440  std::vector<GO> tgtGblColIndsScratch;
5441  size_t offset = 0;
5442  for (LO whichImport = 0; whichImport < numImports; ++whichImport) {
5443  // CrsGraph packs just global column indices, while CrsMatrix
5444  // packs bytes (first the number of entries in the row, then the
5445  // global column indices, then other stuff like the matrix
5446  // values in that row).
5447  const LO origSrcNumEnt =
5448  static_cast<LO>(numPacketsPerLID_h[whichImport]);
5449  GO* const srcGblColInds = imports_h.data() + offset;
5450 
5451  const LO tgtLclRowInd = importLIDs_h[whichImport];
5452  const GO tgtGblRowInd =
5453  tgtRowMap.getGlobalElement(tgtLclRowInd);
5454  auto tgtGblColInds = getRowGraphGlobalRow(
5455  tgtGblColIndsScratch, *this, tgtGblRowInd);
5456  const size_t origTgtNumEnt(tgtGblColInds.size());
5457 
5458  padding->update_import(whichImport, tgtLclRowInd,
5459  tgtGblColInds.getRawPtr(),
5460  origTgtNumEnt, tgt_is_unique,
5461  srcGblColInds,
5462  origSrcNumEnt, src_is_unique);
5463  offset += origSrcNumEnt;
5464  }
5465 
5466  if (verbose) {
5467  std::ostringstream os;
5468  os << *prefix << "Done" << endl;
5469  std::cerr << os.str();
5470  }
5471  return padding;
5472  }
5473 
5474  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5475  std::unique_ptr<
5476  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5477  >
5478  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5479  computePaddingForCrsMatrixUnpack(
5480  const Kokkos::DualView<const local_ordinal_type*,
5481  buffer_device_type>& importLIDs,
5482  Kokkos::DualView<char*, buffer_device_type> imports,
5483  Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
5484  const bool verbose) const
5485  {
5486  using Details::Impl::getRowGraphGlobalRow;
5487  using Details::PackTraits;
5488  using std::endl;
5489  using LO = local_ordinal_type;
5490  using GO = global_ordinal_type;
5491  const char tfecfFuncName[] = "computePaddingForCrsMatrixUnpack";
5492 
5493  std::unique_ptr<std::string> prefix;
5494  if (verbose) {
5495  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5496  std::ostringstream os;
5497  os << *prefix << "importLIDs.extent(0): "
5498  << importLIDs.extent(0)
5499  << ", imports.extent(0): "
5500  << imports.extent(0)
5501  << ", numPacketsPerLID.extent(0): "
5502  << numPacketsPerLID.extent(0) << endl;
5503  std::cerr << os.str();
5504  }
5505  const bool extraVerbose =
5506  verbose && Details::Behavior::verbose("CrsPadding");
5507 
5508  const LO numImports = static_cast<LO>(importLIDs.extent(0));
5509  TEUCHOS_ASSERT( LO(numPacketsPerLID.extent(0)) >= numImports );
5510  const int myRank = [&] () {
5511  auto comm = rowMap_.is_null() ? Teuchos::null :
5512  rowMap_->getComm();
5513  return comm.is_null() ? -1 : comm->getRank();
5514  } ();
5515  std::unique_ptr<padding_type> padding(
5516  new padding_type(myRank, numImports));
5517  Kokkos::fence(); // Make sure host sees changes made by device
5518  if (imports.need_sync_host()) {
5519  imports.sync_host();
5520  }
5521  auto imports_h = imports.view_host();
5522  if (numPacketsPerLID.need_sync_host ()) {
5523  numPacketsPerLID.sync_host();
5524  }
5525  auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5526 
5527  TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
5528  auto importLIDs_h = importLIDs.view_host();
5529 
5530  const map_type& tgtRowMap = *rowMap_;
5531  // Always merge source column indices, since isMerged() is
5532  // per-process state, and we don't know its value on other
5533  // processes that sent us data.
5534  constexpr bool src_is_unique = false;
5535  const bool tgt_is_unique = isMerged();
5536 
5537  std::vector<GO> srcGblColIndsScratch;
5538  std::vector<GO> tgtGblColIndsScratch;
5539  size_t offset = 0;
5540  for (LO whichImport = 0; whichImport < numImports; ++whichImport) {
5541  // CrsGraph packs just global column indices, while CrsMatrix
5542  // packs bytes (first the number of entries in the row, then the
5543  // global column indices, then other stuff like the matrix
5544  // values in that row).
5545  const size_t numBytes = numPacketsPerLID_h[whichImport];
5546  if (extraVerbose) {
5547  std::ostringstream os;
5548  os << *prefix << "whichImport=" << whichImport
5549  << ", numImports=" << numImports
5550  << ", numBytes=" << numBytes << endl;
5551  std::cerr << os.str();
5552  }
5553  if (numBytes == 0) {
5554  continue; // special case: no entries to unpack for this row
5555  }
5556  LO origSrcNumEnt = 0;
5557  const size_t numEntBeg = offset;
5558  const size_t numEntLen =
5559  PackTraits<LO>::packValueCount(origSrcNumEnt);
5560  TEUCHOS_ASSERT( numBytes >= numEntLen );
5561  TEUCHOS_ASSERT( imports_h.extent(0) >= numEntBeg + numEntLen );
5562  PackTraits<LO>::unpackValue(origSrcNumEnt,
5563  imports_h.data() + numEntBeg);
5564  if (extraVerbose) {
5565  std::ostringstream os;
5566  os << *prefix << "whichImport=" << whichImport
5567  << ", numImports=" << numImports
5568  << ", origSrcNumEnt=" << origSrcNumEnt << endl;
5569  std::cerr << os.str();
5570  }
5571  TEUCHOS_ASSERT( origSrcNumEnt >= LO(0) );
5572  TEUCHOS_ASSERT( numBytes >= size_t(numEntLen + origSrcNumEnt * sizeof(GO)) );
5573  const size_t gidsBeg = numEntBeg + numEntLen;
5574  if (srcGblColIndsScratch.size() < size_t(origSrcNumEnt)) {
5575  srcGblColIndsScratch.resize(origSrcNumEnt);
5576  }
5577  GO* const srcGblColInds = srcGblColIndsScratch.data();
5578  PackTraits<GO>::unpackArray(srcGblColInds,
5579  imports_h.data() + gidsBeg,
5580  origSrcNumEnt);
5581  const LO tgtLclRowInd = importLIDs_h[whichImport];
5582  const GO tgtGblRowInd =
5583  tgtRowMap.getGlobalElement(tgtLclRowInd);
5584  auto tgtGblColInds = getRowGraphGlobalRow(
5585  tgtGblColIndsScratch, *this, tgtGblRowInd);
5586  const size_t origNumTgtEnt(tgtGblColInds.size());
5587 
5588  if (extraVerbose) {
5589  std::ostringstream os;
5590  os << *prefix << "whichImport=" << whichImport
5591  << ", numImports=" << numImports
5592  << ": Call padding->update_import" << endl;
5593  std::cerr << os.str();
5594  }
5595  padding->update_import(whichImport, tgtLclRowInd,
5596  tgtGblColInds.getRawPtr(),
5597  origNumTgtEnt, tgt_is_unique,
5598  srcGblColInds,
5599  origSrcNumEnt, src_is_unique);
5600  offset += numBytes;
5601  }
5602 
5603  if (verbose) {
5604  std::ostringstream os;
5605  os << *prefix << "Done" << endl;
5606  std::cerr << os.str();
5607  }
5608  return padding;
5609  }
5610 
5611  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5612  void
5613  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5614  packAndPrepare
5615  (const SrcDistObject& source,
5616  const Kokkos::DualView<const local_ordinal_type*,
5617  buffer_device_type>& exportLIDs,
5618  Kokkos::DualView<packet_type*,
5619  buffer_device_type>& exports,
5620  Kokkos::DualView<size_t*,
5621  buffer_device_type> numPacketsPerLID,
5622  size_t& constantNumPackets,
5623  Distributor& distor)
5624  {
5626  using GO = global_ordinal_type;
5627  using std::endl;
5628  using crs_graph_type =
5629  CrsGraph<local_ordinal_type, global_ordinal_type, node_type>;
5630  using row_graph_type =
5631  RowGraph<local_ordinal_type, global_ordinal_type, node_type>;
5632  const char tfecfFuncName[] = "packAndPrepare: ";
5633  ProfilingRegion region_papn ("Tpetra::CrsGraph::packAndPrepare");
5634 
5635  const bool verbose = verbose_;
5636  std::unique_ptr<std::string> prefix;
5637  if (verbose) {
5638  prefix = this->createPrefix("CrsGraph", "packAndPrepare");
5639  std::ostringstream os;
5640  os << *prefix << "Start" << endl;
5641  std::cerr << os.str();
5642  }
5643 
5644  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5645  (exportLIDs.extent (0) != numPacketsPerLID.extent (0),
5646  std::runtime_error,
5647  "exportLIDs.extent(0) = " << exportLIDs.extent (0)
5648  << " != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent (0)
5649  << ".");
5650  const row_graph_type* srcRowGraphPtr =
5651  dynamic_cast<const row_graph_type*> (&source);
5652  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5653  (srcRowGraphPtr == nullptr, std::invalid_argument, "Source of an Export "
5654  "or Import operation to a CrsGraph must be a RowGraph with the same "
5655  "template parameters.");
5656  // We don't check whether src_graph has had fillComplete called,
5657  // because it doesn't matter whether the *source* graph has been
5658  // fillComplete'd. The target graph can not be fillComplete'd yet.
5659  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5660  (this->isFillComplete (), std::runtime_error,
5661  "The target graph of an Import or Export must not be fill complete.");
5662 
5663  const crs_graph_type* srcCrsGraphPtr =
5664  dynamic_cast<const crs_graph_type*> (&source);
5665 
5666  if (srcCrsGraphPtr == nullptr) {
5667  using Teuchos::ArrayView;
5668  using LO = local_ordinal_type;
5669 
5670  if (verbose) {
5671  std::ostringstream os;
5672  os << *prefix << "Source is a RowGraph but not a CrsGraph"
5673  << endl;
5674  std::cerr << os.str();
5675  }
5676  // RowGraph::pack serves the "old" DistObject interface. It
5677  // takes Teuchos::ArrayView and Teuchos::Array&. The latter
5678  // entails deep-copying the exports buffer on output. RowGraph
5679  // is a convenience interface when not a CrsGraph, so we accept
5680  // the performance hit.
5681  TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
5682  auto exportLIDs_h = exportLIDs.view_host ();
5683  ArrayView<const LO> exportLIDs_av (exportLIDs_h.data (),
5684  exportLIDs_h.extent (0));
5685  Teuchos::Array<GO> exports_a;
5686 
5687  numPacketsPerLID.clear_sync_state ();
5688  numPacketsPerLID.modify_host ();
5689  auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
5690  ArrayView<size_t> numPacketsPerLID_av (numPacketsPerLID_h.data (),
5691  numPacketsPerLID_h.extent (0));
5692  srcRowGraphPtr->pack (exportLIDs_av, exports_a, numPacketsPerLID_av,
5693  constantNumPackets, distor);
5694  const size_t newSize = static_cast<size_t> (exports_a.size ());
5695  if (static_cast<size_t> (exports.extent (0)) != newSize) {
5696  using exports_dv_type = Kokkos::DualView<packet_type*, buffer_device_type>;
5697  exports = exports_dv_type ("exports", newSize);
5698  }
5699  Kokkos::View<const packet_type*, Kokkos::HostSpace,
5700  Kokkos::MemoryUnmanaged> exports_a_h (exports_a.getRawPtr (), newSize);
5701  exports.clear_sync_state ();
5702  exports.modify_host ();
5703  Kokkos::deep_copy (exports.view_host (), exports_a_h);
5704  }
5705  // packCrsGraphNew requires a valid localGraph.
5706  else if (! getColMap ().is_null () &&
5707  (lclGraph_.row_map.extent (0) != 0 ||
5708  getRowMap ()->getNodeNumElements () == 0)) {
5709  if (verbose) {
5710  std::ostringstream os;
5711  os << *prefix << "packCrsGraphNew path" << endl;
5712  std::cerr << os.str();
5713  }
5714  using export_pids_type =
5715  Kokkos::DualView<const int*, buffer_device_type>;
5716  export_pids_type exportPIDs; // not filling it; needed for syntax
5717  using LO = local_ordinal_type;
5718  using NT = node_type;
5720  packCrsGraphNew<LO,GO,NT> (*srcCrsGraphPtr, exportLIDs, exportPIDs,
5721  exports, numPacketsPerLID,
5722  constantNumPackets, false, distor);
5723  }
5724  else {
5725  srcCrsGraphPtr->packFillActiveNew (exportLIDs, exports, numPacketsPerLID,
5726  constantNumPackets, distor);
5727  }
5728 
5729  if (verbose) {
5730  std::ostringstream os;
5731  os << *prefix << "Done" << endl;
5732  std::cerr << os.str();
5733  }
5734  }
5735 
5736  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5737  void
5739  pack (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5740  Teuchos::Array<GlobalOrdinal>& exports,
5741  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5742  size_t& constantNumPackets,
5743  Distributor& distor) const
5744  {
5745  auto col_map = this->getColMap();
5746  // packCrsGraph requires a valid localGraph.
5747  if( !col_map.is_null() && (lclGraph_.row_map.extent(0) != 0 || getRowMap()->getNodeNumElements() ==0)) {
5749  packCrsGraph<LocalOrdinal,GlobalOrdinal,Node>(*this, exports, numPacketsPerLID,
5750  exportLIDs, constantNumPackets, distor);
5751  }
5752  else {
5753  this->packFillActive(exportLIDs, exports, numPacketsPerLID,
5754  constantNumPackets, distor);
5755  }
5756  }
5757 
5758  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5759  void
5761  packFillActive (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5762  Teuchos::Array<GlobalOrdinal>& exports,
5763  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5764  size_t& constantNumPackets,
5765  Distributor& /* distor */) const
5766  {
5767  using std::endl;
5768  using LO = LocalOrdinal;
5769  using GO = GlobalOrdinal;
5770  using host_execution_space =
5771  typename Kokkos::View<size_t*, device_type>::
5772  HostMirror::execution_space;
5773  using device_execution_space =
5774  typename device_type::execution_space;
5775  const char tfecfFuncName[] = "packFillActive: ";
5776  const bool verbose = verbose_;
5777 
5778  const auto numExportLIDs = exportLIDs.size ();
5779  std::unique_ptr<std::string> prefix;
5780  if (verbose) {
5781  prefix = this->createPrefix("CrsGraph", "allocateIndices");
5782  std::ostringstream os;
5783  os << *prefix << "numExportLIDs=" << numExportLIDs << endl;
5784  std::cerr << os.str();
5785  }
5786  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5787  (numExportLIDs != numPacketsPerLID.size (), std::runtime_error,
5788  "exportLIDs.size() = " << numExportLIDs << " != numPacketsPerLID.size()"
5789  " = " << numPacketsPerLID.size () << ".");
5790 
5791  // We may be accessing UVM data on host below, so ensure that the
5792  // device is done accessing it.
5793  device_execution_space().fence ();
5794 
5795  const map_type& rowMap = * (this->getRowMap ());
5796  const map_type* const colMapPtr = this->colMap_.getRawPtr ();
5797  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5798  (this->isLocallyIndexed () && colMapPtr == nullptr, std::logic_error,
5799  "This graph claims to be locally indexed, but its column Map is nullptr. "
5800  "This should never happen. Please report this bug to the Tpetra "
5801  "developers.");
5802 
5803  // We may pack different amounts of data for different rows.
5804  constantNumPackets = 0;
5805 
5806  // mfh 20 Sep 2017: Teuchos::ArrayView isn't thread safe (well,
5807  // it might be now, but we might as well be safe).
5808  size_t* const numPacketsPerLID_raw = numPacketsPerLID.getRawPtr ();
5809  const LO* const exportLIDs_raw = exportLIDs.getRawPtr ();
5810 
5811  // Count the total number of packets (column indices, in the case
5812  // of a CrsGraph) to pack. While doing so, set
5813  // numPacketsPerLID[i] to the number of entries owned by the
5814  // calling process in (local) row exportLIDs[i] of the graph, that
5815  // the caller wants us to send out.
5816  Kokkos::RangePolicy<host_execution_space, LO> inputRange (0, numExportLIDs);
5817  size_t totalNumPackets = 0;
5818  size_t errCount = 0;
5819  // lambdas turn what they capture const, so we can't
5820  // atomic_add(&errCount,1). Instead, we need a View to modify.
5821  typedef Kokkos::Device<host_execution_space, Kokkos::HostSpace>
5822  host_device_type;
5823  Kokkos::View<size_t, host_device_type> errCountView (&errCount);
5824  constexpr size_t ONE = 1;
5825 
5826  Kokkos::parallel_reduce ("Tpetra::CrsGraph::pack: totalNumPackets",
5827  inputRange,
5828  [=] (const LO& i, size_t& curTotalNumPackets) {
5829  const GO gblRow = rowMap.getGlobalElement (exportLIDs_raw[i]);
5830  if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
5831  Kokkos::atomic_add (&errCountView(), ONE);
5832  numPacketsPerLID_raw[i] = 0;
5833  }
5834  else {
5835  const size_t numEnt = this->getNumEntriesInGlobalRow (gblRow);
5836  numPacketsPerLID_raw[i] = numEnt;
5837  curTotalNumPackets += numEnt;
5838  }
5839  },
5840  totalNumPackets);
5841 
5842  if (verbose) {
5843  std::ostringstream os;
5844  os << *prefix << "totalNumPackets=" << totalNumPackets << endl;
5845  std::cerr << os.str();
5846  }
5847  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5848  (errCount != 0, std::logic_error, "totalNumPackets count encountered "
5849  "one or more errors! errCount = " << errCount
5850  << ", totalNumPackets = " << totalNumPackets << ".");
5851  errCount = 0;
5852 
5853  // Allocate space for all the column indices to pack.
5854  exports.resize (totalNumPackets);
5855 
5856  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5857  (! this->supportsRowViews (), std::logic_error,
5858  "this->supportsRowViews() returns false; this should never happen. "
5859  "Please report this bug to the Tpetra developers.");
5860 
5861  // Loop again over the rows to export, and pack rows of indices
5862  // into the output buffer.
5863 
5864  if (verbose) {
5865  std::ostringstream os;
5866  os << *prefix << "Pack into exports" << endl;
5867  std::cerr << os.str();
5868  }
5869 
5870  // Teuchos::ArrayView may not be thread safe, or may not be
5871  // efficiently thread safe. Better to use the raw pointer.
5872  GO* const exports_raw = exports.getRawPtr ();
5873  errCount = 0;
5874  Kokkos::parallel_scan ("Tpetra::CrsGraph::pack: pack from views",
5875  inputRange, [=, &prefix]
5876  (const LO i, size_t& exportsOffset, const bool final) {
5877  const size_t curOffset = exportsOffset;
5878  const GO gblRow = rowMap.getGlobalElement (exportLIDs_raw[i]);
5879  const RowInfo rowInfo =
5880  this->getRowInfoFromGlobalRowIndex (gblRow);
5881 
5882  using TDO = Tpetra::Details::OrdinalTraits<size_t>;
5883  if (rowInfo.localRow == TDO::invalid ()) {
5884  if (verbose) {
5885  std::ostringstream os;
5886  os << *prefix << ": INVALID rowInfo: i=" << i
5887  << ", lclRow=" << exportLIDs_raw[i] << endl;
5888  std::cerr << os.str();
5889  }
5890  Kokkos::atomic_add (&errCountView(), ONE);
5891  }
5892  else if (curOffset + rowInfo.numEntries > totalNumPackets) {
5893  if (verbose) {
5894  std::ostringstream os;
5895  os << *prefix << ": UH OH! For i=" << i << ", lclRow="
5896  << exportLIDs_raw[i] << ", gblRow=" << gblRow << ", curOffset "
5897  "(= " << curOffset << ") + numEnt (= " << rowInfo.numEntries
5898  << ") > totalNumPackets (= " << totalNumPackets << ")."
5899  << endl;
5900  std::cerr << os.str();
5901  }
5902  Kokkos::atomic_add (&errCountView(), ONE);
5903  }
5904  else {
5905  const LO numEnt = static_cast<LO> (rowInfo.numEntries);
5906  if (this->isLocallyIndexed ()) {
5907  const LO* lclColInds = nullptr;
5908  LO capacity = 0;
5909  const LO errCode =
5910  this->getLocalViewRawConst (lclColInds, capacity, rowInfo);
5911  if (errCode == 0) {
5912  if (final) {
5913  for (LO k = 0; k < numEnt; ++k) {
5914  const LO lclColInd = lclColInds[k];
5915  const GO gblColInd = colMapPtr->getGlobalElement (lclColInd);
5916  // Pack it, even if it's wrong. Let the receiving
5917  // process deal with it. Otherwise, we'll miss out
5918  // on any correct data.
5919  exports_raw[curOffset + k] = gblColInd;
5920  } // for each entry in the row
5921  } // final pass?
5922  exportsOffset = curOffset + numEnt;
5923  }
5924  else { // error in getting local row view
5925  Kokkos::atomic_add (&errCountView(), ONE);
5926  }
5927  }
5928  else if (this->isGloballyIndexed ()) {
5929  const GO* gblColInds = nullptr;
5930  LO capacity = 0;
5931  const LO errCode =
5932  this->getGlobalViewRawConst (gblColInds, capacity, rowInfo);
5933  if (errCode == 0) {
5934  if (final) {
5935  for (LO k = 0; k < numEnt; ++k) {
5936  const GO gblColInd = gblColInds[k];
5937  // Pack it, even if it's wrong. Let the receiving
5938  // process deal with it. Otherwise, we'll miss out
5939  // on any correct data.
5940  exports_raw[curOffset + k] = gblColInd;
5941  } // for each entry in the row
5942  } // final pass?
5943  exportsOffset = curOffset + numEnt;
5944  }
5945  else { // error in getting global row view
5946  Kokkos::atomic_add (&errCountView(), ONE);
5947  }
5948  }
5949  // If neither globally nor locally indexed, then the graph
5950  // has no entries in this row (or indeed, in any row on this
5951  // process) to pack.
5952  }
5953  });
5954 
5955  // We may have accessed UVM data on host above, so ensure that the
5956  // device sees these changes.
5957  device_execution_space().fence ();
5958 
5959  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5960  (errCount != 0, std::logic_error, "Packing encountered "
5961  "one or more errors! errCount = " << errCount
5962  << ", totalNumPackets = " << totalNumPackets << ".");
5963 
5964  if (verbose) {
5965  std::ostringstream os;
5966  os << *prefix << "Done" << endl;
5967  std::cerr << os.str();
5968  }
5969  }
5970 
5971  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5972  void
5973  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5974  packFillActiveNew (const Kokkos::DualView<const local_ordinal_type*,
5975  buffer_device_type>& exportLIDs,
5976  Kokkos::DualView<packet_type*,
5977  buffer_device_type>& exports,
5978  Kokkos::DualView<size_t*,
5979  buffer_device_type> numPacketsPerLID,
5980  size_t& constantNumPackets,
5981  Distributor& distor) const
5982  {
5983  using std::endl;
5984  using LO = local_ordinal_type;
5985  using GO = global_ordinal_type;
5986  using host_execution_space = typename Kokkos::View<size_t*,
5987  device_type>::HostMirror::execution_space;
5988  using host_device_type =
5989  Kokkos::Device<host_execution_space, Kokkos::HostSpace>;
5990  using device_execution_space = typename device_type::execution_space;
5991  using exports_dv_type =
5992  Kokkos::DualView<packet_type*, buffer_device_type>;
5993  const char tfecfFuncName[] = "packFillActiveNew: ";
5994  const bool verbose = verbose_;
5995 
5996  const auto numExportLIDs = exportLIDs.extent (0);
5997  std::unique_ptr<std::string> prefix;
5998  if (verbose) {
5999  prefix = this->createPrefix("CrsGraph", "packFillActiveNew");
6000  std::ostringstream os;
6001  os << *prefix << "numExportLIDs: " << numExportLIDs
6002  << ", numPacketsPerLID.extent(0): "
6003  << numPacketsPerLID.extent(0) << endl;
6004  std::cerr << os.str();
6005  }
6006  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6007  (numExportLIDs != numPacketsPerLID.extent (0), std::runtime_error,
6008  "exportLIDs.extent(0) = " << numExportLIDs
6009  << " != numPacketsPerLID.extent(0) = "
6010  << numPacketsPerLID.extent (0) << ".");
6011  TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
6012  auto exportLIDs_h = exportLIDs.view_host ();
6013 
6014  // We may be accessing UVM data on host below, so ensure that the
6015  // device is done accessing it.
6016  device_execution_space().fence ();
6017 
6018  const map_type& rowMap = * (this->getRowMap ());
6019  const map_type* const colMapPtr = this->colMap_.getRawPtr ();
6020  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6021  (this->isLocallyIndexed () && colMapPtr == nullptr, std::logic_error,
6022  "This graph claims to be locally indexed, but its column Map is nullptr. "
6023  "This should never happen. Please report this bug to the Tpetra "
6024  "developers.");
6025 
6026  // We may pack different amounts of data for different rows.
6027  constantNumPackets = 0;
6028 
6029  numPacketsPerLID.clear_sync_state ();
6030  numPacketsPerLID.modify_host ();
6031  auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6032 
6033  // Count the total number of packets (column indices, in the case
6034  // of a CrsGraph) to pack. While doing so, set
6035  // numPacketsPerLID[i] to the number of entries owned by the
6036  // calling process in (local) row exportLIDs[i] of the graph, that
6037  // the caller wants us to send out.
6038  using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
6039  range_type inputRange (0, numExportLIDs);
6040  size_t totalNumPackets = 0;
6041  size_t errCount = 0;
6042  // lambdas turn what they capture const, so we can't
6043  // atomic_add(&errCount,1). Instead, we need a View to modify.
6044  Kokkos::View<size_t, host_device_type> errCountView (&errCount);
6045  constexpr size_t ONE = 1;
6046 
6047  if (verbose) {
6048  std::ostringstream os;
6049  os << *prefix << "Compute totalNumPackets" << endl;
6050  std::cerr << os.str ();
6051  }
6052 
6053  Kokkos::parallel_reduce
6054  ("Tpetra::CrsGraph::pack: totalNumPackets",
6055  inputRange,
6056  [=, &prefix] (const LO i, size_t& curTotalNumPackets) {
6057  const LO lclRow = exportLIDs_h[i];
6058  const GO gblRow = rowMap.getGlobalElement (lclRow);
6059  if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
6060  if (verbose) {
6061  std::ostringstream os;
6062  os << *prefix << "For i=" << i << ", lclRow=" << lclRow
6063  << " not in row Map on this process" << endl;
6064  std::cerr << os.str();
6065  }
6066  Kokkos::atomic_add (&errCountView(), ONE);
6067  numPacketsPerLID_h(i) = 0;
6068  }
6069  else {
6070  const size_t numEnt = this->getNumEntriesInGlobalRow (gblRow);
6071  numPacketsPerLID_h(i) = numEnt;
6072  curTotalNumPackets += numEnt;
6073  }
6074  },
6075  totalNumPackets);
6076 
6077  if (verbose) {
6078  std::ostringstream os;
6079  os << *prefix << "totalNumPackets: " << totalNumPackets
6080  << ", errCount: " << errCount << endl;
6081  std::cerr << os.str ();
6082  }
6083  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6084  (errCount != 0, std::logic_error, "totalNumPackets count encountered "
6085  "one or more errors! totalNumPackets: " << totalNumPackets
6086  << ", errCount: " << errCount << ".");
6087 
6088  // Allocate space for all the column indices to pack.
6089  if (size_t(exports.extent (0)) < totalNumPackets) {
6090  // FIXME (mfh 09 Apr 2019) Create without initializing.
6091  exports = exports_dv_type ("exports", totalNumPackets);
6092  }
6093 
6094  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6095  (! this->supportsRowViews (), std::logic_error,
6096  "this->supportsRowViews() returns false; this should never happen. "
6097  "Please report this bug to the Tpetra developers.");
6098 
6099  // Loop again over the rows to export, and pack rows of indices
6100  // into the output buffer.
6101 
6102  if (verbose) {
6103  std::ostringstream os;
6104  os << *prefix << "Pack into exports buffer" << endl;
6105  std::cerr << os.str();
6106  }
6107 
6108  exports.clear_sync_state ();
6109  exports.modify_host ();
6110  auto exports_h = exports.view_host ();
6111 
6112  // The graph may store its data in UVM memory, so make sure that
6113  // any device kernels are done modifying the graph's data before
6114  // reading the data.
6115  device_execution_space().fence ();
6116 
6117  errCount = 0;
6118  Kokkos::parallel_scan
6119  ("Tpetra::CrsGraph::packFillActiveNew: Pack exports",
6120  inputRange, [=, &prefix]
6121  (const LO i, size_t& exportsOffset, const bool final) {
6122  const size_t curOffset = exportsOffset;
6123  const LO lclRow = exportLIDs_h(i);
6124  const GO gblRow = rowMap.getGlobalElement (lclRow);
6125  if (gblRow == Details::OrdinalTraits<GO>::invalid ()) {
6126  if (verbose) {
6127  std::ostringstream os;
6128  os << *prefix << "For i=" << i << ", lclRow=" << lclRow
6129  << " not in row Map on this process" << endl;
6130  std::cerr << os.str();
6131  }
6132  Kokkos::atomic_add (&errCountView(), ONE);
6133  return;
6134  }
6135 
6136  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (gblRow);
6137  if (rowInfo.localRow == Details::OrdinalTraits<size_t>::invalid ()) {
6138  if (verbose) {
6139  std::ostringstream os;
6140  os << *prefix << "For i=" << i << ", lclRow=" << lclRow
6141  << ", gblRow=" << gblRow << ": invalid rowInfo"
6142  << endl;
6143  std::cerr << os.str();
6144  }
6145  Kokkos::atomic_add (&errCountView(), ONE);
6146  return;
6147  }
6148 
6149  if (curOffset + rowInfo.numEntries > totalNumPackets) {
6150  if (verbose) {
6151  std::ostringstream os;
6152  os << *prefix << "For i=" << i << ", lclRow=" << lclRow
6153  << ", gblRow=" << gblRow << ", curOffset (= "
6154  << curOffset << ") + numEnt (= " << rowInfo.numEntries
6155  << ") > totalNumPackets (= " << totalNumPackets
6156  << ")." << endl;
6157  std::cerr << os.str();
6158  }
6159  Kokkos::atomic_add (&errCountView(), ONE);
6160  return;
6161  }
6162 
6163  const LO numEnt = static_cast<LO> (rowInfo.numEntries);
6164  if (this->isLocallyIndexed ()) {
6165  const LO* lclColInds = nullptr;
6166  LO capacity = 0;
6167  const LO errCode =
6168  this->getLocalViewRawConst (lclColInds, capacity, rowInfo);
6169  if (errCode == 0) {
6170  if (final) {
6171  for (LO k = 0; k < numEnt; ++k) {
6172  const LO lclColInd = lclColInds[k];
6173  const GO gblColInd = colMapPtr->getGlobalElement (lclColInd);
6174  // Pack it, even if it's wrong. Let the receiving
6175  // process deal with it. Otherwise, we'll miss out
6176  // on any correct data.
6177  exports_h(curOffset + k) = gblColInd;
6178  } // for each entry in the row
6179  } // final pass?
6180  exportsOffset = curOffset + numEnt;
6181  }
6182  else { // error in getting local row view
6183  if (verbose) {
6184  std::ostringstream os;
6185  os << *prefix << "For i=" << i << ", lclRow=" << lclRow
6186  << ", gblRow=" << gblRow << ": getLocalViewRawConst"
6187  "returned nonzero error code " << errCode << endl;
6188  std::cerr << os.str();
6189  }
6190  Kokkos::atomic_add (&errCountView(), ONE);
6191  }
6192  }
6193  else if (this->isGloballyIndexed ()) {
6194  const GO* gblColInds = nullptr;
6195  LO capacity = 0;
6196  const LO errCode =
6197  this->getGlobalViewRawConst (gblColInds, capacity, rowInfo);
6198  if (errCode == 0) {
6199  if (final) {
6200  for (LO k = 0; k < numEnt; ++k) {
6201  const GO gblColInd = gblColInds[k];
6202  // Pack it, even if it's wrong. Let the receiving
6203  // process deal with it. Otherwise, we'll miss out
6204  // on any correct data.
6205  exports_h(curOffset + k) = gblColInd;
6206  } // for each entry in the row
6207  } // final pass?
6208  exportsOffset = curOffset + numEnt;
6209  }
6210  else { // error in getting global row view
6211  if (verbose) {
6212  std::ostringstream os;
6213  os << *prefix << "For i=" << i << ", lclRow=" << lclRow
6214  << ", gblRow=" << gblRow << ": "
6215  "getGlobalViewRawConst returned nonzero error code "
6216  << errCode << endl;
6217  std::cerr << os.str();
6218  }
6219  Kokkos::atomic_add (&errCountView(), ONE);
6220  }
6221  }
6222  // If neither globally nor locally indexed, then the graph
6223  // has no entries in this row (or indeed, in any row on this
6224  // process) to pack.
6225  });
6226 
6227  // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6228  // (errCount != 0, std::logic_error, "Packing encountered "
6229  // "one or more errors! errCount = " << errCount
6230  // << ", totalNumPackets = " << totalNumPackets << ".");
6231 
6232  if (verbose) {
6233  std::ostringstream os;
6234  os << *prefix << "errCount=" << errCount << "; Done" << endl;
6235  std::cerr << os.str();
6236  }
6237  }
6238 
6239  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6240  void
6241  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
6242  unpackAndCombine
6243  (const Kokkos::DualView<const local_ordinal_type*,
6244  buffer_device_type>& importLIDs,
6245  Kokkos::DualView<packet_type*,
6246  buffer_device_type> imports,
6247  Kokkos::DualView<size_t*,
6248  buffer_device_type> numPacketsPerLID,
6249  const size_t /* constantNumPackets */,
6250  Distributor& /* distor */,
6251  const CombineMode /* combineMode */ )
6252  {
6253  using Details::ProfilingRegion;
6254  using std::endl;
6255  using LO = local_ordinal_type;
6256  using GO = global_ordinal_type;
6257  const char tfecfFuncName[] = "unpackAndCombine";
6258 
6259  ProfilingRegion regionCGC("Tpetra::CrsGraph::unpackAndCombine");
6260  const bool verbose = verbose_;
6261 
6262  std::unique_ptr<std::string> prefix;
6263  if (verbose) {
6264  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
6265  std::ostringstream os;
6266  os << *prefix << "Start" << endl;
6267  std::cerr << os.str ();
6268  }
6269  {
6270  auto padding = computeCrsPaddingForImports(
6271  importLIDs, imports, numPacketsPerLID, verbose);
6272  applyCrsPadding(*padding, verbose);
6273  if (verbose) {
6274  std::ostringstream os;
6275  os << *prefix << "Done computing & applying padding" << endl;
6276  std::cerr << os.str ();
6277  }
6278  }
6279 
6280  // FIXME (mfh 02 Apr 2012) REPLACE combine mode has a perfectly
6281  // reasonable meaning, whether or not the matrix is fill complete.
6282  // It's just more work to implement.
6283 
6284  // We are not checking the value of the CombineMode input
6285  // argument. For CrsGraph, we only support import/export
6286  // operations if fillComplete has not yet been called. Any
6287  // incoming column-indices are inserted into the target graph. In
6288  // this context, CombineMode values of ADD vs INSERT are
6289  // equivalent. What is the meaning of REPLACE for CrsGraph? If a
6290  // duplicate column-index is inserted, it will be compressed out
6291  // when fillComplete is called.
6292  //
6293  // Note: I think REPLACE means that an existing row is replaced by
6294  // the imported row, i.e., the existing indices are cleared. CGB,
6295  // 6/17/2010
6296 
6297  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6298  (importLIDs.extent (0) != numPacketsPerLID.extent (0),
6299  std::runtime_error, ": importLIDs.extent(0) = "
6300  << importLIDs.extent (0) << " != numPacketsPerLID.extent(0) = "
6301  << numPacketsPerLID.extent (0) << ".");
6302  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6303  (isFillComplete (), std::runtime_error,
6304  ": Import or Export operations are not allowed on a target "
6305  "CrsGraph that is fillComplete.");
6306 
6307  const size_t numImportLIDs(importLIDs.extent(0));
6308  if (numPacketsPerLID.need_sync_host()) {
6309  numPacketsPerLID.sync_host();
6310  }
6311  auto numPacketsPerLID_h = numPacketsPerLID.view_host();
6312  if (imports.need_sync_host()) {
6313  imports.sync_host();
6314  }
6315  auto imports_h = imports.view_host();
6316  TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
6317  auto importLIDs_h = importLIDs.view_host();
6318 
6319  // If we're inserting in local indices, let's pre-allocate
6320  Teuchos::Array<LO> lclColInds;
6321  if (isLocallyIndexed()) {
6322  if (verbose) {
6323  std::ostringstream os;
6324  os << *prefix << "Preallocate local indices scratch" << endl;
6325  std::cerr << os.str();
6326  }
6327  size_t maxNumInserts = 0;
6328  for (size_t i = 0; i < numImportLIDs; ++i) {
6329  maxNumInserts = std::max (maxNumInserts, numPacketsPerLID_h[i]);
6330  }
6331  if (verbose) {
6332  std::ostringstream os;
6333  os << *prefix << "Local indices scratch size: "
6334  << maxNumInserts << endl;
6335  std::cerr << os.str();
6336  }
6337  lclColInds.resize (maxNumInserts);
6338  }
6339  else {
6340  if (verbose) {
6341  std::ostringstream os;
6342  os << *prefix;
6343  if (isGloballyIndexed()) {
6344  os << "Graph is globally indexed";
6345  }
6346  else {
6347  os << "Graph is neither locally nor globally indexed";
6348  }
6349  os << endl;
6350  std::cerr << os.str();
6351  }
6352  }
6353 
6354  TEUCHOS_ASSERT( ! rowMap_.is_null() );
6355  const map_type& rowMap = *rowMap_;
6356 
6357  try {
6358  size_t importsOffset = 0;
6359  for (size_t i = 0; i < numImportLIDs; ++i) {
6360  if (verbose) {
6361  std::ostringstream os;
6362  os << *prefix << "i=" << i << ", numImportLIDs="
6363  << numImportLIDs << endl;
6364  std::cerr << os.str();
6365  }
6366  // We can only unpack into owned rows, since we only have
6367  // local row indices.
6368  const LO lclRow = importLIDs_h[i];
6369  const GO gblRow = rowMap.getGlobalElement(lclRow);
6370  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6371  (gblRow == Teuchos::OrdinalTraits<GO>::invalid(),
6372  std::logic_error, "importLIDs[i=" << i << "]="
6373  << lclRow << " is not in the row Map on the calling "
6374  "process.");
6375  const LO numEnt = numPacketsPerLID_h[i];
6376  const GO* const gblColInds = (numEnt == 0) ? nullptr :
6377  imports_h.data() + importsOffset;
6378  if (! isLocallyIndexed()) {
6379  insertGlobalIndicesFiltered(lclRow, gblColInds, numEnt);
6380  }
6381  else {
6382  // FIXME (mfh 09 Feb 2020) Now would be a good time to do
6383  // column Map filtering.
6384  for (LO j = 0; j < numEnt; j++) {
6385  lclColInds[j] = colMap_->getLocalElement(gblColInds[j]);
6386  }
6387  insertLocalIndices(lclRow, numEnt, lclColInds.data());
6388  }
6389  importsOffset += numEnt;
6390  }
6391  }
6392  catch (std::exception& e) {
6393  TEUCHOS_TEST_FOR_EXCEPTION
6394  (true, std::runtime_error,
6395  "Tpetra::CrsGraph::unpackAndCombine: Insert loop threw an "
6396  "exception: " << endl << e.what());
6397  }
6398 
6399  if (verbose) {
6400  std::ostringstream os;
6401  os << *prefix << "Done" << endl;
6402  std::cerr << os.str();
6403  }
6404  }
6405 
6406  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6407  void
6409  removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& newMap)
6410  {
6411  using Teuchos::Comm;
6412  using Teuchos::null;
6413  using Teuchos::ParameterList;
6414  using Teuchos::RCP;
6415 
6416  // We'll set all the state "transactionally," so that this method
6417  // satisfies the strong exception guarantee. This object's state
6418  // won't be modified until the end of this method.
6419  RCP<const map_type> rowMap, domainMap, rangeMap, colMap;
6420  RCP<import_type> importer;
6421  RCP<export_type> exporter;
6422 
6423  rowMap = newMap;
6424  RCP<const Comm<int> > newComm =
6425  (newMap.is_null ()) ? null : newMap->getComm ();
6426 
6427  if (! domainMap_.is_null ()) {
6428  if (domainMap_.getRawPtr () == rowMap_.getRawPtr ()) {
6429  // Common case: original domain and row Maps are identical.
6430  // In that case, we need only replace the original domain Map
6431  // with the new Map. This ensures that the new domain and row
6432  // Maps _stay_ identical.
6433  domainMap = newMap;
6434  } else {
6435  domainMap = domainMap_->replaceCommWithSubset (newComm);
6436  }
6437  }
6438  if (! rangeMap_.is_null ()) {
6439  if (rangeMap_.getRawPtr () == rowMap_.getRawPtr ()) {
6440  // Common case: original range and row Maps are identical. In
6441  // that case, we need only replace the original range Map with
6442  // the new Map. This ensures that the new range and row Maps
6443  // _stay_ identical.
6444  rangeMap = newMap;
6445  } else {
6446  rangeMap = rangeMap_->replaceCommWithSubset (newComm);
6447  }
6448  }
6449  if (! colMap.is_null ()) {
6450  colMap = colMap_->replaceCommWithSubset (newComm);
6451  }
6452 
6453  // (Re)create the Export and / or Import if necessary.
6454  if (! newComm.is_null ()) {
6455  RCP<ParameterList> params = this->getNonconstParameterList (); // could be null
6456  //
6457  // The operations below are collective on the new communicator.
6458  //
6459  // (Re)create the Export object if necessary. If I haven't
6460  // called fillComplete yet, I don't have a rangeMap, so I must
6461  // first check if the _original_ rangeMap is not null. Ditto
6462  // for the Import object and the domain Map.
6463  if (! rangeMap_.is_null () &&
6464  rangeMap != rowMap &&
6465  ! rangeMap->isSameAs (*rowMap)) {
6466  if (params.is_null () || ! params->isSublist ("Export")) {
6467  exporter = rcp (new export_type (rowMap, rangeMap));
6468  }
6469  else {
6470  RCP<ParameterList> exportSublist = sublist (params, "Export", true);
6471  exporter = rcp (new export_type (rowMap, rangeMap, exportSublist));
6472  }
6473  }
6474  // (Re)create the Import object if necessary.
6475  if (! domainMap_.is_null () &&
6476  domainMap != colMap &&
6477  ! domainMap->isSameAs (*colMap)) {
6478  if (params.is_null () || ! params->isSublist ("Import")) {
6479  importer = rcp (new import_type (domainMap, colMap));
6480  } else {
6481  RCP<ParameterList> importSublist = sublist (params, "Import", true);
6482  importer = rcp (new import_type (domainMap, colMap, importSublist));
6483  }
6484  }
6485  } // if newComm is not null
6486 
6487  // Defer side effects until the end. If no destructors throw
6488  // exceptions (they shouldn't anyway), then this method satisfies
6489  // the strong exception guarantee.
6490  exporter_ = exporter;
6491  importer_ = importer;
6492  rowMap_ = rowMap;
6493  // mfh 31 Mar 2013: DistObject's map_ is the row Map of a CrsGraph
6494  // or CrsMatrix. CrsGraph keeps a redundant pointer (rowMap_) to
6495  // the same object. We might want to get rid of this redundant
6496  // pointer sometime, but for now, we'll leave it alone and just
6497  // set map_ to the same object.
6498  this->map_ = rowMap;
6499  domainMap_ = domainMap;
6500  rangeMap_ = rangeMap;
6501  colMap_ = colMap;
6502  }
6503 
6504  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6505  void
6507  getLocalDiagOffsets (const Kokkos::View<size_t*, device_type, Kokkos::MemoryUnmanaged>& offsets) const
6508  {
6509  using std::endl;
6510  using LO = LocalOrdinal;
6511  using GO = GlobalOrdinal;
6512  const char tfecfFuncName[] = "getLocalDiagOffsets: ";
6513  const bool verbose = verbose_;
6514 
6515  std::unique_ptr<std::string> prefix;
6516  if (verbose) {
6517  prefix = this->createPrefix("CrsGraph", "getLocalDiagOffsets");
6518  std::ostringstream os;
6519  os << *prefix << "offsets.extent(0)=" << offsets.extent(0)
6520  << endl;
6521  std::cerr << os.str();
6522  }
6523 
6524  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6525  (! hasColMap (), std::runtime_error, "The graph must have a column Map.");
6526  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
6527  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6528  (static_cast<LO> (offsets.extent (0)) < lclNumRows,
6529  std::invalid_argument, "offsets.extent(0) = " <<
6530  offsets.extent (0) << " < getNodeNumRows() = " << lclNumRows << ".");
6531 
6532  const map_type& rowMap = * (this->getRowMap ());
6533  const map_type& colMap = * (this->getColMap ());
6534 
6535  // We only use these in debug mode, but since debug mode is a
6536  // run-time option, they need to exist here. That's why we create
6537  // the vector with explicit size zero, to avoid overhead if debug
6538  // mode is off.
6539  bool allRowMapDiagEntriesInColMap = true;
6540  bool allDiagEntriesFound = true;
6541  bool allOffsetsCorrect = true;
6542  bool noOtherWeirdness = true;
6543  using wrong_offsets_type = std::vector<std::pair<LO, size_t> >;
6544  wrong_offsets_type wrongOffsets(0);
6545 
6546  // mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just
6547  // the subset of Map functionality that we need below.
6548  auto lclRowMap = rowMap.getLocalMap ();
6549  auto lclColMap = colMap.getLocalMap ();
6550 
6551  // FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this
6552  // setup, at least on the host. For CUDA, we have to use LocalMap
6553  // (that comes from each of the two Maps).
6554 
6555  const bool sorted = this->isSorted ();
6556  if (isFillComplete ()) {
6557  auto lclGraph = this->getLocalGraph ();
6558  ::Tpetra::Details::getGraphDiagOffsets (offsets, lclRowMap, lclColMap,
6559  lclGraph.row_map,
6560  lclGraph.entries, sorted);
6561  }
6562  else {
6563  // NOTE (mfh 22 Feb 2017): We have to run this code on host,
6564  // since the graph is not fill complete. The previous version
6565  // of this code assumed UVM; this version does not.
6566  auto offsets_h = Kokkos::create_mirror_view (offsets);
6567 
6568  for (LO lclRowInd = 0; lclRowInd < lclNumRows; ++lclRowInd) {
6569  // Find the diagonal entry. Since the row Map and column Map
6570  // may differ, we have to compare global row and column
6571  // indices, not local.
6572  const GO gblRowInd = lclRowMap.getGlobalElement (lclRowInd);
6573  const GO gblColInd = gblRowInd;
6574  const LO lclColInd = lclColMap.getLocalElement (gblColInd);
6575 
6576  if (lclColInd == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
6577  allRowMapDiagEntriesInColMap = false;
6578  offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
6579  }
6580  else {
6581  const RowInfo rowInfo = this->getRowInfo (lclRowInd);
6582  if (static_cast<LO> (rowInfo.localRow) == lclRowInd &&
6583  rowInfo.numEntries > 0) {
6584 
6585  auto colInds = this->getLocalKokkosRowView (rowInfo);
6586  const size_t hint = 0; // not needed for this algorithm
6587  const size_t offset =
6588  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
6589  lclColInd, hint, sorted);
6590  offsets_h(lclRowInd) = offset;
6591 
6592  if (debug_) {
6593  // Now that we have what we think is an offset, make sure
6594  // that it really does point to the diagonal entry. Offsets
6595  // are _relative_ to each row, not absolute (for the whole
6596  // (local) graph).
6597  Teuchos::ArrayView<const LO> lclColInds;
6598  try {
6599  this->getLocalRowView (lclRowInd, lclColInds);
6600  }
6601  catch (...) {
6602  noOtherWeirdness = false;
6603  }
6604  // Don't continue with error checking if the above failed.
6605  if (noOtherWeirdness) {
6606  const size_t numEnt = lclColInds.size ();
6607  if (offset >= numEnt) {
6608  // Offsets are relative to each row, so this means that
6609  // the offset is out of bounds.
6610  allOffsetsCorrect = false;
6611  wrongOffsets.push_back (std::make_pair (lclRowInd, offset));
6612  } else {
6613  const LO actualLclColInd = lclColInds[offset];
6614  const GO actualGblColInd = lclColMap.getGlobalElement (actualLclColInd);
6615  if (actualGblColInd != gblColInd) {
6616  allOffsetsCorrect = false;
6617  wrongOffsets.push_back (std::make_pair (lclRowInd, offset));
6618  }
6619  }
6620  }
6621  } // debug_
6622  }
6623  else { // either row is empty, or something went wrong w/ getRowInfo()
6624  offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
6625  allDiagEntriesFound = false;
6626  }
6627  } // whether lclColInd is a valid local column index
6628  } // for each local row
6629 
6630  Kokkos::deep_copy (offsets, offsets_h);
6631  } // whether the graph is fill complete
6632 
6633  if (verbose && wrongOffsets.size () != 0) {
6634  std::ostringstream os;
6635  os << *prefix << "Wrong offsets: [";
6636  for (size_t k = 0; k < wrongOffsets.size (); ++k) {
6637  os << "(" << wrongOffsets[k].first << ","
6638  << wrongOffsets[k].second << ")";
6639  if (k + 1 < wrongOffsets.size ()) {
6640  os << ", ";
6641  }
6642  }
6643  os << "]" << endl;
6644  std::cerr << os.str();
6645  }
6646 
6647  if (debug_) {
6648  using Teuchos::reduceAll;
6649  using std::endl;
6650  Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getComm ();
6651  const bool localSuccess =
6652  allRowMapDiagEntriesInColMap && allDiagEntriesFound && allOffsetsCorrect;
6653  const int numResults = 5;
6654  int lclResults[5];
6655  lclResults[0] = allRowMapDiagEntriesInColMap ? 1 : 0;
6656  lclResults[1] = allDiagEntriesFound ? 1 : 0;
6657  lclResults[2] = allOffsetsCorrect ? 1 : 0;
6658  lclResults[3] = noOtherWeirdness ? 1 : 0;
6659  // min-all-reduce will compute least rank of all the processes
6660  // that didn't succeed.
6661  lclResults[4] = ! localSuccess ? comm->getRank () : comm->getSize ();
6662 
6663  int gblResults[5];
6664  gblResults[0] = 0;
6665  gblResults[1] = 0;
6666  gblResults[2] = 0;
6667  gblResults[3] = 0;
6668  gblResults[4] = 0;
6669  reduceAll<int, int> (*comm, Teuchos::REDUCE_MIN,
6670  numResults, lclResults, gblResults);
6671 
6672  if (gblResults[0] != 1 || gblResults[1] != 1 || gblResults[2] != 1
6673  || gblResults[3] != 1) {
6674  std::ostringstream os; // build error message
6675  os << "Issue(s) that we noticed (on Process " << gblResults[4] << ", "
6676  "possibly among others): " << endl;
6677  if (gblResults[0] == 0) {
6678  os << " - The column Map does not contain at least one diagonal entry "
6679  "of the graph." << endl;
6680  }
6681  if (gblResults[1] == 0) {
6682  os << " - On one or more processes, some row does not contain a "
6683  "diagonal entry." << endl;
6684  }
6685  if (gblResults[2] == 0) {
6686  os << " - On one or more processes, some offsets are incorrect."
6687  << endl;
6688  }
6689  if (gblResults[3] == 0) {
6690  os << " - One or more processes had some other error."
6691  << endl;
6692  }
6693  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str());
6694  }
6695  } // debug_
6696  }
6697 
6698  namespace { // (anonymous)
6699 
6700  // mfh 21 Jan 2016: This is useful for getLocalDiagOffsets (see
6701  // below). The point is to avoid the deep copy between the input
6702  // Teuchos::ArrayRCP and the internally used Kokkos::View. We
6703  // can't use UVM to avoid the deep copy with CUDA, because the
6704  // ArrayRCP is a host pointer, while the input to the graph's
6705  // getLocalDiagOffsets method is a device pointer. Assigning a
6706  // host pointer to a device pointer is incorrect unless the host
6707  // pointer points to host pinned memory. The goal is to get rid
6708  // of the Teuchos::ArrayRCP overload anyway, so we accept the deep
6709  // copy for backwards compatibility.
6710  //
6711  // We have to use template magic because
6712  // "staticGraph_->getLocalDiagOffsets(offsetsHosts)" won't compile
6713  // if device_type::memory_space is not Kokkos::HostSpace (as is
6714  // the case with CUDA).
6715 
6716  template<class DeviceType,
6717  const bool memSpaceIsHostSpace =
6718  std::is_same<typename DeviceType::memory_space,
6719  Kokkos::HostSpace>::value>
6720  struct HelpGetLocalDiagOffsets {};
6721 
6722  template<class DeviceType>
6723  struct HelpGetLocalDiagOffsets<DeviceType, true> {
6724  typedef DeviceType device_type;
6725  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6726  Kokkos::MemoryUnmanaged> device_offsets_type;
6727  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6728  Kokkos::MemoryUnmanaged> host_offsets_type;
6729 
6730  static device_offsets_type
6731  getDeviceOffsets (const host_offsets_type& hostOffsets)
6732  {
6733  // Host and device are the same; no need to allocate a
6734  // temporary device View.
6735  return hostOffsets;
6736  }
6737 
6738  static void
6739  copyBackIfNeeded (const host_offsets_type& /* hostOffsets */,
6740  const device_offsets_type& /* deviceOffsets */)
6741  { /* copy back not needed; host and device are the same */ }
6742  };
6743 
6744  template<class DeviceType>
6745  struct HelpGetLocalDiagOffsets<DeviceType, false> {
6746  typedef DeviceType device_type;
6747  // We have to do a deep copy, since host memory space != device
6748  // memory space. Thus, the device View is managed (we need to
6749  // allocate a temporary device View).
6750  typedef Kokkos::View<size_t*, device_type> device_offsets_type;
6751  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6752  Kokkos::MemoryUnmanaged> host_offsets_type;
6753 
6754  static device_offsets_type
6755  getDeviceOffsets (const host_offsets_type& hostOffsets)
6756  {
6757  // Host memory space != device memory space, so we must
6758  // allocate a temporary device View for the graph.
6759  return device_offsets_type ("offsets", hostOffsets.extent (0));
6760  }
6761 
6762  static void
6763  copyBackIfNeeded (const host_offsets_type& hostOffsets,
6764  const device_offsets_type& deviceOffsets)
6765  {
6766  Kokkos::deep_copy (hostOffsets, deviceOffsets);
6767  }
6768  };
6769  } // namespace (anonymous)
6770 
6771 
6772  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6773  void
6775  getLocalDiagOffsets (Teuchos::ArrayRCP<size_t>& offsets) const
6776  {
6777  typedef LocalOrdinal LO;
6778  const char tfecfFuncName[] = "getLocalDiagOffsets: ";
6779  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6780  (! this->hasColMap (), std::runtime_error,
6781  "The graph does not yet have a column Map.");
6782  const LO myNumRows = static_cast<LO> (this->getNodeNumRows ());
6783  if (static_cast<LO> (offsets.size ()) != myNumRows) {
6784  // NOTE (mfh 21 Jan 2016) This means that the method does not
6785  // satisfy the strong exception guarantee (no side effects
6786  // unless successful).
6787  offsets.resize (myNumRows);
6788  }
6789 
6790  // mfh 21 Jan 2016: This method unfortunately takes a
6791  // Teuchos::ArrayRCP, which is host memory. The graph wants a
6792  // device pointer. We can't access host memory from the device;
6793  // that's the wrong direction for UVM. (It's the right direction
6794  // for inefficient host pinned memory, but we don't want to use
6795  // that here.) Thus, if device memory space != host memory space,
6796  // we allocate and use a temporary device View to get the offsets.
6797  // If the two spaces are equal, the template magic makes the deep
6798  // copy go away.
6799  typedef HelpGetLocalDiagOffsets<device_type> helper_type;
6800  typedef typename helper_type::host_offsets_type host_offsets_type;
6801  // Unmanaged host View that views the output array.
6802  host_offsets_type hostOffsets (offsets.getRawPtr (), myNumRows);
6803  // Allocate temp device View if host != device, else reuse host array.
6804  auto deviceOffsets = helper_type::getDeviceOffsets (hostOffsets);
6805  // NOT recursion; this calls the overload that takes a device View.
6806  this->getLocalDiagOffsets (deviceOffsets);
6807  helper_type::copyBackIfNeeded (hostOffsets, deviceOffsets);
6808  }
6809 
6810  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6811  bool
6814  return true;
6815  }
6816 
6817  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6818  void
6821  const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& rowTransfer,
6822  const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node> > & domainTransfer,
6823  const Teuchos::RCP<const map_type>& domainMap,
6824  const Teuchos::RCP<const map_type>& rangeMap,
6825  const Teuchos::RCP<Teuchos::ParameterList>& params) const
6826  {
6831  using Teuchos::ArrayRCP;
6832  using Teuchos::ArrayView;
6833  using Teuchos::Comm;
6834  using Teuchos::ParameterList;
6835  using Teuchos::rcp;
6836  using Teuchos::RCP;
6837 #ifdef HAVE_TPETRA_MMM_TIMINGS
6838  using std::string;
6839  using Teuchos::TimeMonitor;
6840 #endif
6841 
6842  using LO = LocalOrdinal;
6843  using GO = GlobalOrdinal;
6844  using NT = node_type;
6845  using this_type = CrsGraph<LO, GO, NT>;
6846  using ivector_type = Vector<int, LO, GO, NT>;
6847  using packet_type = typename this_type::packet_type;
6848 
6849  const char* prefix = "Tpetra::CrsGraph::transferAndFillComplete: ";
6850 
6851 #ifdef HAVE_TPETRA_MMM_TIMINGS
6852  string label;
6853  if(!params.is_null()) label = params->get("Timer Label", label);
6854  string prefix2 = string("Tpetra ")+ label + std::string(": CrsGraph TAFC ");
6855  RCP<TimeMonitor> MM =
6856  rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Pack-1"))));
6857 #endif
6858 
6859  // Make sure that the input argument rowTransfer is either an
6860  // Import or an Export. Import and Export are the only two
6861  // subclasses of Transfer that we defined, but users might
6862  // (unwisely, for now at least) decide to implement their own
6863  // subclasses. Exclude this possibility.
6864  const import_type* xferAsImport = dynamic_cast<const import_type*>(&rowTransfer);
6865  const export_type* xferAsExport = dynamic_cast<const export_type*>(&rowTransfer);
6866  TEUCHOS_TEST_FOR_EXCEPTION(
6867  xferAsImport == nullptr && xferAsExport == nullptr, std::invalid_argument,
6868  prefix << "The 'rowTransfer' input argument must be either an Import or "
6869  "an Export, and its template parameters must match the corresponding "
6870  "template parameters of the CrsGraph.");
6871 
6872  // Make sure that the input argument domainTransfer is either an
6873  // Import or an Export. Import and Export are the only two
6874  // subclasses of Transfer that we defined, but users might
6875  // (unwisely, for now at least) decide to implement their own
6876  // subclasses. Exclude this possibility.
6877  Teuchos::RCP<const import_type> xferDomainAsImport =
6878  Teuchos::rcp_dynamic_cast<const import_type>(domainTransfer);
6879  Teuchos::RCP<const export_type> xferDomainAsExport =
6880  Teuchos::rcp_dynamic_cast<const export_type>(domainTransfer);
6881 
6882  if(! domainTransfer.is_null()) {
6883 
6884  TEUCHOS_TEST_FOR_EXCEPTION(
6885  (xferDomainAsImport.is_null() && xferDomainAsExport.is_null()), std::invalid_argument,
6886  prefix << "The 'domainTransfer' input argument must be either an "
6887  "Import or an Export, and its template parameters must match the "
6888  "corresponding template parameters of the CrsGraph.");
6889 
6890  TEUCHOS_TEST_FOR_EXCEPTION(
6891  ( xferAsImport != nullptr || ! xferDomainAsImport.is_null() ) &&
6892  (( xferAsImport != nullptr && xferDomainAsImport.is_null() ) ||
6893  ( xferAsImport == nullptr && ! xferDomainAsImport.is_null() )), std::invalid_argument,
6894  prefix << "The 'rowTransfer' and 'domainTransfer' input arguments "
6895  "must be of the same type (either Import or Export).");
6896 
6897  TEUCHOS_TEST_FOR_EXCEPTION(
6898  ( xferAsExport != nullptr || ! xferDomainAsExport.is_null() ) &&
6899  (( xferAsExport != nullptr && xferDomainAsExport.is_null() ) ||
6900  ( xferAsExport == nullptr && ! xferDomainAsExport.is_null() )), std::invalid_argument,
6901  prefix << "The 'rowTransfer' and 'domainTransfer' input arguments "
6902  "must be of the same type (either Import or Export).");
6903 
6904  } // domainTransfer != null
6905 
6906 
6907  // FIXME (mfh 15 May 2014) Wouldn't communication still be needed,
6908  // if the source Map is not distributed but the target Map is?
6909  const bool communication_needed = rowTransfer.getSourceMap()->isDistributed();
6910 
6911  //
6912  // Get the caller's parameters
6913  //
6914 
6915  bool reverseMode = false; // Are we in reverse mode?
6916  bool restrictComm = false; // Do we need to restrict the communicator?
6917  RCP<ParameterList> graphparams; // parameters for the destination graph
6918  if (! params.is_null()) {
6919  reverseMode = params->get("Reverse Mode", reverseMode);
6920  restrictComm = params->get("Restrict Communicator", restrictComm);
6921  graphparams = sublist(params, "CrsGraph");
6922  }
6923 
6924  // Get the new domain and range Maps. We need some of them for error
6925  // checking, now that we have the reverseMode parameter.
6926  RCP<const map_type> MyRowMap = reverseMode ?
6927  rowTransfer.getSourceMap() : rowTransfer.getTargetMap();
6928  RCP<const map_type> MyColMap; // create this below
6929  RCP<const map_type> MyDomainMap = ! domainMap.is_null() ? domainMap : getDomainMap();
6930  RCP<const map_type> MyRangeMap = ! rangeMap.is_null() ? rangeMap : getRangeMap();
6931  RCP<const map_type> BaseRowMap = MyRowMap;
6932  RCP<const map_type> BaseDomainMap = MyDomainMap;
6933 
6934  // If the user gave us a nonnull destGraph, then check whether it's
6935  // "pristine." That means that it has no entries.
6936  //
6937  // FIXME (mfh 15 May 2014) If this is not true on all processes,
6938  // then this exception test may hang. It would be better to
6939  // forward an error flag to the next communication phase.
6940  if (! destGraph.is_null()) {
6941  // FIXME (mfh 15 May 2014): The Epetra idiom for checking
6942  // whether a graph or matrix has no entries on the calling
6943  // process, is that it is neither locally nor globally indexed.
6944  // This may change eventually with the Kokkos refactor version
6945  // of Tpetra, so it would be better just to check the quantity
6946  // of interest directly. Note that with the Kokkos refactor
6947  // version of Tpetra, asking for the total number of entries in
6948  // a graph or matrix that is not fill complete might require
6949  // computation (kernel launch), since it is not thread scalable
6950  // to update a count every time an entry is inserted.
6951  const bool NewFlag =
6952  ! destGraph->isLocallyIndexed() && ! destGraph->isGloballyIndexed();
6953  TEUCHOS_TEST_FOR_EXCEPTION(! NewFlag, std::invalid_argument,
6954  prefix << "The input argument 'destGraph' is only allowed to be nonnull, "
6955  "if its graph is empty (neither locally nor globally indexed).");
6956 
6957  // FIXME (mfh 15 May 2014) At some point, we want to change
6958  // graphs and matrices so that their DistObject Map
6959  // (this->getMap()) may differ from their row Map. This will
6960  // make redistribution for 2-D distributions more efficient. I
6961  // hesitate to change this check, because I'm not sure how much
6962  // the code here depends on getMap() and getRowMap() being the
6963  // same.
6964  TEUCHOS_TEST_FOR_EXCEPTION(
6965  ! destGraph->getRowMap()->isSameAs(*MyRowMap), std::invalid_argument,
6966  prefix << "The (row) Map of the input argument 'destGraph' is not the "
6967  "same as the (row) Map specified by the input argument 'rowTransfer'.");
6968 
6969  TEUCHOS_TEST_FOR_EXCEPTION(
6970  ! destGraph->checkSizes(*this), std::invalid_argument,
6971  prefix << "You provided a nonnull destination graph, but checkSizes() "
6972  "indicates that it is not a legal legal target for redistribution from "
6973  "the source graph (*this). This may mean that they do not have the "
6974  "same dimensions.");
6975  }
6976 
6977  // If forward mode (the default), then *this's (row) Map must be
6978  // the same as the source Map of the Transfer. If reverse mode,
6979  // then *this's (row) Map must be the same as the target Map of
6980  // the Transfer.
6981  //
6982  // FIXME (mfh 15 May 2014) At some point, we want to change graphs
6983  // and matrices so that their DistObject Map (this->getMap()) may
6984  // differ from their row Map. This will make redistribution for
6985  // 2-D distributions more efficient. I hesitate to change this
6986  // check, because I'm not sure how much the code here depends on
6987  // getMap() and getRowMap() being the same.
6988  TEUCHOS_TEST_FOR_EXCEPTION(
6989  ! (reverseMode || getRowMap()->isSameAs(*rowTransfer.getSourceMap())),
6990  std::invalid_argument, prefix <<
6991  "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
6992 
6993  TEUCHOS_TEST_FOR_EXCEPTION(
6994  ! (! reverseMode || getRowMap()->isSameAs(*rowTransfer.getTargetMap())),
6995  std::invalid_argument, prefix <<
6996  "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
6997 
6998  // checks for domainTransfer
6999  TEUCHOS_TEST_FOR_EXCEPTION(
7000  ! xferDomainAsImport.is_null() && ! xferDomainAsImport->getTargetMap()->isSameAs(*domainMap),
7001  std::invalid_argument,
7002  prefix << "The target map of the 'domainTransfer' input argument must be "
7003  "the same as the rebalanced domain map 'domainMap'");
7004 
7005  TEUCHOS_TEST_FOR_EXCEPTION(
7006  ! xferDomainAsExport.is_null() && ! xferDomainAsExport->getSourceMap()->isSameAs(*domainMap),
7007  std::invalid_argument,
7008  prefix << "The source map of the 'domainTransfer' input argument must be "
7009  "the same as the rebalanced domain map 'domainMap'");
7010 
7011  // The basic algorithm here is:
7012  //
7013  // 1. Call the moral equivalent of "distor.do" to handle the import.
7014  // 2. Copy all the Imported and Copy/Permuted data into the raw
7015  // CrsGraph pointers, still using GIDs.
7016  // 3. Call an optimized version of MakeColMap that avoids the
7017  // Directory lookups (since the importer knows who owns all the
7018  // GIDs) AND reindexes to LIDs.
7019  // 4. Call expertStaticFillComplete()
7020 
7021  // Get information from the Importer
7022  const size_t NumSameIDs = rowTransfer.getNumSameIDs();
7023  ArrayView<const LO> ExportLIDs = reverseMode ?
7024  rowTransfer.getRemoteLIDs() : rowTransfer.getExportLIDs();
7025  ArrayView<const LO> RemoteLIDs = reverseMode ?
7026  rowTransfer.getExportLIDs() : rowTransfer.getRemoteLIDs();
7027  ArrayView<const LO> PermuteToLIDs = reverseMode ?
7028  rowTransfer.getPermuteFromLIDs() : rowTransfer.getPermuteToLIDs();
7029  ArrayView<const LO> PermuteFromLIDs = reverseMode ?
7030  rowTransfer.getPermuteToLIDs() : rowTransfer.getPermuteFromLIDs();
7031  Distributor& Distor = rowTransfer.getDistributor();
7032 
7033  // Owning PIDs
7034  Teuchos::Array<int> SourcePids;
7035  Teuchos::Array<int> TargetPids;
7036  int MyPID = getComm()->getRank();
7037 
7038  // Temp variables for sub-communicators
7039  RCP<const map_type> ReducedRowMap, ReducedColMap,
7040  ReducedDomainMap, ReducedRangeMap;
7041  RCP<const Comm<int> > ReducedComm;
7042 
7043  // If the user gave us a null destGraph, then construct the new
7044  // destination graph. We will replace its column Map later.
7045  if (destGraph.is_null()) {
7046  destGraph = rcp(new this_type(MyRowMap, 0, StaticProfile, graphparams));
7047  }
7048 
7049  /***************************************************/
7050  /***** 1) First communicator restriction phase ****/
7051  /***************************************************/
7052  if (restrictComm) {
7053  ReducedRowMap = MyRowMap->removeEmptyProcesses();
7054  ReducedComm = ReducedRowMap.is_null() ?
7055  Teuchos::null :
7056  ReducedRowMap->getComm();
7057  destGraph->removeEmptyProcessesInPlace(ReducedRowMap);
7058 
7059  ReducedDomainMap = MyRowMap.getRawPtr() == MyDomainMap.getRawPtr() ?
7060  ReducedRowMap :
7061  MyDomainMap->replaceCommWithSubset(ReducedComm);
7062  ReducedRangeMap = MyRowMap.getRawPtr() == MyRangeMap.getRawPtr() ?
7063  ReducedRowMap :
7064  MyRangeMap->replaceCommWithSubset(ReducedComm);
7065 
7066  // Reset the "my" maps
7067  MyRowMap = ReducedRowMap;
7068  MyDomainMap = ReducedDomainMap;
7069  MyRangeMap = ReducedRangeMap;
7070 
7071  // Update my PID, if we've restricted the communicator
7072  if (! ReducedComm.is_null()) {
7073  MyPID = ReducedComm->getRank();
7074  }
7075  else {
7076  MyPID = -2; // For debugging
7077  }
7078  }
7079  else {
7080  ReducedComm = MyRowMap->getComm();
7081  }
7082 
7083  /***************************************************/
7084  /***** 2) From Tpera::DistObject::doTransfer() ****/
7085  /***************************************************/
7086 #ifdef HAVE_TPETRA_MMM_TIMINGS
7087  MM = Teuchos::null;
7088  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("ImportSetup"))));
7089 #endif
7090  // Get the owning PIDs
7091  RCP<const import_type> MyImporter = getImporter();
7092 
7093  // check whether domain maps of source graph and base domain map is the same
7094  bool bSameDomainMap = BaseDomainMap->isSameAs(*getDomainMap());
7095 
7096  if (! restrictComm && ! MyImporter.is_null() && bSameDomainMap ) {
7097  // Same domain map as source graph
7098  //
7099  // NOTE: This won't work for restrictComm (because the Import
7100  // doesn't know the restricted PIDs), though writing an
7101  // optimized version for that case would be easy (Import an
7102  // IntVector of the new PIDs). Might want to add this later.
7103  Import_Util::getPids(*MyImporter, SourcePids, false);
7104  }
7105  else if (restrictComm && ! MyImporter.is_null() && bSameDomainMap) {
7106  // Same domain map as source graph (restricted communicator)
7107  // We need one import from the domain to the column map
7108  ivector_type SourceDomain_pids(getDomainMap(),true);
7109  ivector_type SourceCol_pids(getColMap());
7110  // SourceDomain_pids contains the restricted pids
7111  SourceDomain_pids.putScalar(MyPID);
7112 
7113  SourceCol_pids.doImport(SourceDomain_pids, *MyImporter, INSERT);
7114  SourcePids.resize(getColMap()->getNodeNumElements());
7115  SourceCol_pids.get1dCopy(SourcePids());
7116  }
7117  else if (MyImporter.is_null() && bSameDomainMap) {
7118  // Graph has no off-process entries
7119  SourcePids.resize(getColMap()->getNodeNumElements());
7120  SourcePids.assign(getColMap()->getNodeNumElements(), MyPID);
7121  }
7122  else if ( ! MyImporter.is_null() &&
7123  ! domainTransfer.is_null() ) {
7124  // general implementation for rectangular matrices with
7125  // domain map different than SourceGraph domain map.
7126  // User has to provide a DomainTransfer object. We need
7127  // to communications (import/export)
7128 
7129  // TargetDomain_pids lives on the rebalanced new domain map
7130  ivector_type TargetDomain_pids(domainMap);
7131  TargetDomain_pids.putScalar(MyPID);
7132 
7133  // SourceDomain_pids lives on the non-rebalanced old domain map
7134  ivector_type SourceDomain_pids(getDomainMap());
7135 
7136  // SourceCol_pids lives on the non-rebalanced old column map
7137  ivector_type SourceCol_pids(getColMap());
7138 
7139  if (! reverseMode && ! xferDomainAsImport.is_null() ) {
7140  SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsImport, INSERT);
7141  }
7142  else if (reverseMode && ! xferDomainAsExport.is_null() ) {
7143  SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsExport, INSERT);
7144  }
7145  else if (! reverseMode && ! xferDomainAsExport.is_null() ) {
7146  SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsExport, INSERT);
7147  }
7148  else if (reverseMode && ! xferDomainAsImport.is_null() ) {
7149  SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsImport, INSERT);
7150  }
7151  else {
7152  TEUCHOS_TEST_FOR_EXCEPTION(
7153  true, std::logic_error,
7154  prefix << "Should never get here! Please report this bug to a Tpetra developer.");
7155  }
7156  SourceCol_pids.doImport(SourceDomain_pids, *MyImporter, INSERT);
7157  SourcePids.resize(getColMap()->getNodeNumElements());
7158  SourceCol_pids.get1dCopy(SourcePids());
7159  }
7160  else if (BaseDomainMap->isSameAs(*BaseRowMap) &&
7161  getDomainMap()->isSameAs(*getRowMap())) {
7162  // We can use the rowTransfer + SourceGraph's Import to find out who owns what.
7163  ivector_type TargetRow_pids(domainMap);
7164  ivector_type SourceRow_pids(getRowMap());
7165  ivector_type SourceCol_pids(getColMap());
7166 
7167  TargetRow_pids.putScalar(MyPID);
7168  if (! reverseMode && xferAsImport != nullptr) {
7169  SourceRow_pids.doExport(TargetRow_pids, *xferAsImport, INSERT);
7170  }
7171  else if (reverseMode && xferAsExport != nullptr) {
7172  SourceRow_pids.doExport(TargetRow_pids, *xferAsExport, INSERT);
7173  }
7174  else if (! reverseMode && xferAsExport != nullptr) {
7175  SourceRow_pids.doImport(TargetRow_pids, *xferAsExport, INSERT);
7176  }
7177  else if (reverseMode && xferAsImport != nullptr) {
7178  SourceRow_pids.doImport(TargetRow_pids, *xferAsImport, INSERT);
7179  }
7180  else {
7181  TEUCHOS_TEST_FOR_EXCEPTION(
7182  true, std::logic_error,
7183  prefix << "Should never get here! Please report this bug to a Tpetra developer.");
7184  }
7185  SourceCol_pids.doImport(SourceRow_pids, *MyImporter, INSERT);
7186  SourcePids.resize(getColMap()->getNodeNumElements());
7187  SourceCol_pids.get1dCopy(SourcePids());
7188  }
7189  else {
7190  TEUCHOS_TEST_FOR_EXCEPTION(
7191  true, std::invalid_argument,
7192  prefix << "This method only allows either domainMap == getDomainMap(), "
7193  "or (domainMap == rowTransfer.getTargetMap() and getDomainMap() == getRowMap()).");
7194  }
7195 
7196  // Tpetra-specific stuff
7197  size_t constantNumPackets = destGraph->constantNumberOfPackets();
7198  if (constantNumPackets == 0) {
7199  destGraph->reallocArraysForNumPacketsPerLid(ExportLIDs.size(),
7200  RemoteLIDs.size());
7201  }
7202  else {
7203  // There are a constant number of packets per element. We
7204  // already know (from the number of "remote" (incoming)
7205  // elements) how many incoming elements we expect, so we can
7206  // resize the buffer accordingly.
7207  const size_t rbufLen = RemoteLIDs.size() * constantNumPackets;
7208  destGraph->reallocImportsIfNeeded(rbufLen, false, nullptr);
7209  }
7210 
7211  {
7212  // packAndPrepare* methods modify numExportPacketsPerLID_.
7213  destGraph->numExportPacketsPerLID_.modify_host();
7214  Teuchos::ArrayView<size_t> numExportPacketsPerLID =
7215  getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
7216 
7217  // Pack & Prepare w/ owning PIDs
7218  packCrsGraphWithOwningPIDs(*this, destGraph->exports_,
7219  numExportPacketsPerLID, ExportLIDs,
7220  SourcePids, constantNumPackets, Distor);
7221  }
7222 
7223  // Do the exchange of remote data.
7224 #ifdef HAVE_TPETRA_MMM_TIMINGS
7225  MM = Teuchos::null;
7226  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Transfer"))));
7227 #endif
7228 
7229  if (communication_needed) {
7230  if (reverseMode) {
7231  if (constantNumPackets == 0) { // variable number of packets per LID
7232  // Make sure that host has the latest version, since we're
7233  // using the version on host. If host has the latest
7234  // version, syncing to host does nothing.
7235  destGraph->numExportPacketsPerLID_.sync_host();
7236  Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
7237  getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
7238  destGraph->numImportPacketsPerLID_.sync_host();
7239  Teuchos::ArrayView<size_t> numImportPacketsPerLID =
7240  getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
7241  Distor.doReversePostsAndWaits(numExportPacketsPerLID, 1,
7242  numImportPacketsPerLID);
7243  size_t totalImportPackets = 0;
7244  for (Array_size_type i = 0; i < numImportPacketsPerLID.size(); ++i) {
7245  totalImportPackets += numImportPacketsPerLID[i];
7246  }
7247 
7248  // Reallocation MUST go before setting the modified flag,
7249  // because it may clear out the flags.
7250  destGraph->reallocImportsIfNeeded(totalImportPackets, false, nullptr);
7251  destGraph->imports_.modify_host();
7252  Teuchos::ArrayView<packet_type> hostImports =
7253  getArrayViewFromDualView(destGraph->imports_);
7254  // This is a legacy host pack/unpack path, so use the host
7255  // version of exports_.
7256  destGraph->exports_.sync_host();
7257  Teuchos::ArrayView<const packet_type> hostExports =
7258  getArrayViewFromDualView(destGraph->exports_);
7259  Distor.doReversePostsAndWaits(hostExports,
7260  numExportPacketsPerLID,
7261  hostImports,
7262  numImportPacketsPerLID);
7263  }
7264  else { // constant number of packets per LI
7265  destGraph->imports_.modify_host();
7266  Teuchos::ArrayView<packet_type> hostImports =
7267  getArrayViewFromDualView(destGraph->imports_);
7268  // This is a legacy host pack/unpack path, so use the host
7269  // version of exports_.
7270  destGraph->exports_.sync_host();
7271  Teuchos::ArrayView<const packet_type> hostExports =
7272  getArrayViewFromDualView(destGraph->exports_);
7273  Distor.doReversePostsAndWaits(hostExports,
7274  constantNumPackets,
7275  hostImports);
7276  }
7277  }
7278  else { // forward mode (the default)
7279  if (constantNumPackets == 0) { // variable number of packets per LID
7280  // Make sure that host has the latest version, since we're
7281  // using the version on host. If host has the latest
7282  // version, syncing to host does nothing.
7283  destGraph->numExportPacketsPerLID_.sync_host();
7284  Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
7285  getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
7286  destGraph->numImportPacketsPerLID_.sync_host();
7287  Teuchos::ArrayView<size_t> numImportPacketsPerLID =
7288  getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
7289  Distor.doPostsAndWaits(numExportPacketsPerLID, 1,
7290  numImportPacketsPerLID);
7291  size_t totalImportPackets = 0;
7292  for (Array_size_type i = 0; i < numImportPacketsPerLID.size(); ++i) {
7293  totalImportPackets += numImportPacketsPerLID[i];
7294  }
7295 
7296  // Reallocation MUST go before setting the modified flag,
7297  // because it may clear out the flags.
7298  destGraph->reallocImportsIfNeeded(totalImportPackets, false, nullptr);
7299  destGraph->imports_.modify_host();
7300  Teuchos::ArrayView<packet_type> hostImports =
7301  getArrayViewFromDualView(destGraph->imports_);
7302  // This is a legacy host pack/unpack path, so use the host
7303  // version of exports_.
7304  destGraph->exports_.sync_host();
7305  Teuchos::ArrayView<const packet_type> hostExports =
7306  getArrayViewFromDualView(destGraph->exports_);
7307  Distor.doPostsAndWaits(hostExports,
7308  numExportPacketsPerLID,
7309  hostImports,
7310  numImportPacketsPerLID);
7311  }
7312  else { // constant number of packets per LID
7313  destGraph->imports_.modify_host();
7314  Teuchos::ArrayView<packet_type> hostImports =
7315  getArrayViewFromDualView(destGraph->imports_);
7316  // This is a legacy host pack/unpack path, so use the host
7317  // version of exports_.
7318  destGraph->exports_.sync_host();
7319  Teuchos::ArrayView<const packet_type> hostExports =
7320  getArrayViewFromDualView(destGraph->exports_);
7321  Distor.doPostsAndWaits(hostExports,
7322  constantNumPackets,
7323  hostImports);
7324  }
7325  }
7326  }
7327 
7328  /*********************************************************************/
7329  /**** 3) Copy all of the Same/Permute/Remote data into CSR_arrays ****/
7330  /*********************************************************************/
7331 
7332 #ifdef HAVE_TPETRA_MMM_TIMINGS
7333  MM = Teuchos::null;
7334  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Unpack-1"))));
7335 #endif
7336 
7337  // Backwards compatibility measure. We'll use this again below.
7338  destGraph->numImportPacketsPerLID_.sync_host();
7339  Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
7340  getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
7341  destGraph->imports_.sync_host();
7342  Teuchos::ArrayView<const packet_type> hostImports =
7343  getArrayViewFromDualView(destGraph->imports_);
7344  size_t mynnz =
7345  unpackAndCombineWithOwningPIDsCount(*this, RemoteLIDs, hostImports,
7346  numImportPacketsPerLID,
7347  constantNumPackets, Distor, INSERT,
7348  NumSameIDs, PermuteToLIDs, PermuteFromLIDs);
7349  size_t N = BaseRowMap->getNodeNumElements();
7350 
7351  // Allocations
7352  ArrayRCP<size_t> CSR_rowptr(N+1);
7353  ArrayRCP<GO> CSR_colind_GID;
7354  ArrayRCP<LO> CSR_colind_LID;
7355  CSR_colind_GID.resize(mynnz);
7356 
7357  // If LO and GO are the same, we can reuse memory when
7358  // converting the column indices from global to local indices.
7359  if (typeid(LO) == typeid(GO)) {
7360  CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO>(CSR_colind_GID);
7361  }
7362  else {
7363  CSR_colind_LID.resize(mynnz);
7364  }
7365 
7366  // FIXME (mfh 15 May 2014) Why can't we abstract this out as an
7367  // unpackAndCombine method on a "CrsArrays" object? This passing
7368  // in a huge list of arrays is icky. Can't we have a bit of an
7369  // abstraction? Implementing a concrete DistObject subclass only
7370  // takes five methods.
7371  unpackAndCombineIntoCrsArrays(*this, RemoteLIDs, hostImports,
7372  numImportPacketsPerLID, constantNumPackets,
7373  Distor, INSERT, NumSameIDs, PermuteToLIDs,
7374  PermuteFromLIDs, N, mynnz, MyPID,
7375  CSR_rowptr(), CSR_colind_GID(),
7376  SourcePids(), TargetPids);
7377 
7378  /**************************************************************/
7379  /**** 4) Call Optimized MakeColMap w/ no Directory Lookups ****/
7380  /**************************************************************/
7381 #ifdef HAVE_TPETRA_MMM_TIMINGS
7382  MM = Teuchos::null;
7383  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Unpack-2"))));
7384 #endif
7385  // Call an optimized version of makeColMap that avoids the
7386  // Directory lookups (since the Import object knows who owns all
7387  // the GIDs).
7388  Teuchos::Array<int> RemotePids;
7389  Import_Util::lowCommunicationMakeColMapAndReindex(CSR_rowptr(),
7390  CSR_colind_LID(),
7391  CSR_colind_GID(),
7392  BaseDomainMap,
7393  TargetPids, RemotePids,
7394  MyColMap);
7395 
7396  /*******************************************************/
7397  /**** 4) Second communicator restriction phase ****/
7398  /*******************************************************/
7399  if (restrictComm) {
7400  ReducedColMap = (MyRowMap.getRawPtr() == MyColMap.getRawPtr()) ?
7401  ReducedRowMap :
7402  MyColMap->replaceCommWithSubset(ReducedComm);
7403  MyColMap = ReducedColMap; // Reset the "my" maps
7404  }
7405 
7406  // Replace the col map
7407  destGraph->replaceColMap(MyColMap);
7408 
7409  // Short circuit if the processor is no longer in the communicator
7410  //
7411  // NOTE: Epetra replaces modifies all "removed" processes so they
7412  // have a dummy (serial) Map that doesn't touch the original
7413  // communicator. Duplicating that here might be a good idea.
7414  if (ReducedComm.is_null()) {
7415  return;
7416  }
7417 
7418  /***************************************************/
7419  /**** 5) Sort ****/
7420  /***************************************************/
7421  if ((! reverseMode && xferAsImport != nullptr) ||
7422  (reverseMode && xferAsExport != nullptr)) {
7423  Import_Util::sortCrsEntries(CSR_rowptr(),
7424  CSR_colind_LID());
7425  }
7426  else if ((! reverseMode && xferAsExport != nullptr) ||
7427  (reverseMode && xferAsImport != nullptr)) {
7428  Import_Util::sortAndMergeCrsEntries(CSR_rowptr(),
7429  CSR_colind_LID());
7430  if (CSR_rowptr[N] != mynnz) {
7431  CSR_colind_LID.resize(CSR_rowptr[N]);
7432  }
7433  }
7434  else {
7435  TEUCHOS_TEST_FOR_EXCEPTION(
7436  true, std::logic_error,
7437  prefix << "Should never get here! Please report this bug to a Tpetra developer.");
7438  }
7439  /***************************************************/
7440  /**** 6) Reset the colmap and the arrays ****/
7441  /***************************************************/
7442 
7443  // Call constructor for the new graph (restricted as needed)
7444  //
7445  destGraph->setAllIndices(CSR_rowptr, CSR_colind_LID);
7446 
7447  /***************************************************/
7448  /**** 7) Build Importer & Call ESFC ****/
7449  /***************************************************/
7450  // Pre-build the importer using the existing PIDs
7451  Teuchos::ParameterList esfc_params;
7452 #ifdef HAVE_TPETRA_MMM_TIMINGS
7453  MM = Teuchos::null;
7454  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("CreateImporter"))));
7455 #endif
7456  RCP<import_type> MyImport = rcp(new import_type(MyDomainMap, MyColMap, RemotePids));
7457 #ifdef HAVE_TPETRA_MMM_TIMINGS
7458  MM = Teuchos::null;
7459  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("ESFC"))));
7460 
7461  esfc_params.set("Timer Label",prefix + std::string("TAFC"));
7462 #endif
7463  if(!params.is_null())
7464  esfc_params.set("compute global constants",params->get("compute global constants",true));
7465 
7466  destGraph->expertStaticFillComplete(MyDomainMap, MyRangeMap,
7467  MyImport, Teuchos::null, rcp(&esfc_params,false));
7468 
7469  }
7470 
7471  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7472  void
7475  const import_type& importer,
7476  const Teuchos::RCP<const map_type>& domainMap,
7477  const Teuchos::RCP<const map_type>& rangeMap,
7478  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7479  {
7480  transferAndFillComplete(destGraph, importer, Teuchos::null, domainMap, rangeMap, params);
7481  }
7482 
7483  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7484  void
7487  const import_type& rowImporter,
7488  const import_type& domainImporter,
7489  const Teuchos::RCP<const map_type>& domainMap,
7490  const Teuchos::RCP<const map_type>& rangeMap,
7491  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7492  {
7493  transferAndFillComplete(destGraph, rowImporter, Teuchos::rcpFromRef(domainImporter), domainMap, rangeMap, params);
7494  }
7495 
7496  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7497  void
7500  const export_type& exporter,
7501  const Teuchos::RCP<const map_type>& domainMap,
7502  const Teuchos::RCP<const map_type>& rangeMap,
7503  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7504  {
7505  transferAndFillComplete(destGraph, exporter, Teuchos::null, domainMap, rangeMap, params);
7506  }
7507 
7508  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7509  void
7512  const export_type& rowExporter,
7513  const export_type& domainExporter,
7514  const Teuchos::RCP<const map_type>& domainMap,
7515  const Teuchos::RCP<const map_type>& rangeMap,
7516  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7517  {
7518  transferAndFillComplete(destGraph, rowExporter, Teuchos::rcpFromRef(domainExporter), domainMap, rangeMap, params);
7519  }
7520 
7521 
7522  template<class LocalOrdinal, class GlobalOrdinal, class Node>
7523  void
7526  {
7527  std::swap(graph.rowMap_, this->rowMap_);
7528  std::swap(graph.colMap_, this->colMap_);
7529  std::swap(graph.rangeMap_, this->rangeMap_);
7530  std::swap(graph.domainMap_, this->domainMap_);
7531 
7532  std::swap(graph.importer_, this->importer_);
7533  std::swap(graph.exporter_, this->exporter_);
7534 
7535  std::swap(graph.lclGraph_, this->lclGraph_);
7536 
7537  std::swap(graph.nodeMaxNumRowEntries_, this->nodeMaxNumRowEntries_);
7538 
7539  std::swap(graph.globalNumEntries_, this->globalNumEntries_);
7540  std::swap(graph.globalMaxNumRowEntries_, this->globalMaxNumRowEntries_);
7541 
7542  std::swap(graph.numAllocForAllRows_, this->numAllocForAllRows_);
7543 
7544  std::swap(graph.k_rowPtrs_, this->k_rowPtrs_);
7545 
7546  std::swap(graph.k_lclInds1D_, this->k_lclInds1D_);
7547  std::swap(graph.k_gblInds1D_, this->k_gblInds1D_);
7548 
7549  std::swap(graph.storageStatus_, this->storageStatus_);
7550 
7551  std::swap(graph.indicesAreAllocated_, this->indicesAreAllocated_);
7552  std::swap(graph.indicesAreLocal_, this->indicesAreLocal_);
7553  std::swap(graph.indicesAreGlobal_, this->indicesAreGlobal_);
7554  std::swap(graph.fillComplete_, this->fillComplete_);
7555  std::swap(graph.indicesAreSorted_, this->indicesAreSorted_);
7556  std::swap(graph.noRedundancies_, this->noRedundancies_);
7557  std::swap(graph.haveLocalConstants_, this->haveLocalConstants_);
7558  std::swap(graph.haveGlobalConstants_, this->haveGlobalConstants_);
7559 
7560  std::swap(graph.sortGhostsAssociatedWithEachProcessor_, this->sortGhostsAssociatedWithEachProcessor_);
7561 
7562  std::swap(graph.k_numAllocPerRow_, this->k_numAllocPerRow_); // View
7563  std::swap(graph.k_numRowEntries_, this->k_numRowEntries_); // View
7564  std::swap(graph.nonlocals_, this->nonlocals_); // std::map
7565  }
7566 
7567 
7568  template<class LocalOrdinal, class GlobalOrdinal, class Node>
7569  bool
7572  {
7573  auto compare_nonlocals = [&] (const nonlocals_type & m1, const nonlocals_type & m2) {
7574  bool output = true;
7575  output = m1.size() == m2.size() ? output : false;
7576  for(auto & it_m: m1)
7577  {
7578  size_t key = it_m.first;
7579  output = m2.find(key) != m2.end() ? output : false;
7580  if(output)
7581  {
7582  auto v1 = m1.find(key)->second;
7583  auto v2 = m2.find(key)->second;
7584  std::sort(v1.begin(), v1.end());
7585  std::sort(v2.begin(), v2.end());
7586 
7587  output = v1.size() == v2.size() ? output : false;
7588  for(size_t i=0; output && i<v1.size(); i++)
7589  {
7590  output = v1[i]==v2[i] ? output : false;
7591  }
7592  }
7593  }
7594  return output;
7595  };
7596 
7597  bool output = true;
7598 
7599  output = this->rowMap_->isSameAs( *(graph.rowMap_) ) ? output : false;
7600  output = this->colMap_->isSameAs( *(graph.colMap_) ) ? output : false;
7601  output = this->rangeMap_->isSameAs( *(graph.rangeMap_) ) ? output : false;
7602  output = this->domainMap_->isSameAs( *(graph.domainMap_) ) ? output : false;
7603 
7604  output = this->nodeMaxNumRowEntries_ == graph.nodeMaxNumRowEntries_ ? output : false;
7605 
7606  output = this->globalNumEntries_ == graph.globalNumEntries_ ? output : false;
7607  output = this->globalMaxNumRowEntries_ == graph.globalMaxNumRowEntries_ ? output : false;
7608 
7609  output = this->numAllocForAllRows_ == graph.numAllocForAllRows_ ? output : false;
7610 
7611  output = this->storageStatus_ == graph.storageStatus_ ? output : false; // EStorageStatus is an enum
7612 
7613  output = this->indicesAreAllocated_ == graph.indicesAreAllocated_ ? output : false;
7614  output = this->indicesAreLocal_ == graph.indicesAreLocal_ ? output : false;
7615  output = this->indicesAreGlobal_ == graph.indicesAreGlobal_ ? output : false;
7616  output = this->fillComplete_ == graph.fillComplete_ ? output : false;
7617  output = this->indicesAreSorted_ == graph.indicesAreSorted_ ? output : false;
7618  output = this->noRedundancies_ == graph.noRedundancies_ ? output : false;
7619  output = this->haveLocalConstants_ == graph.haveLocalConstants_ ? output : false;
7620  output = this->haveGlobalConstants_ == graph.haveGlobalConstants_ ? output : false;
7621  output = this->sortGhostsAssociatedWithEachProcessor_ == this->sortGhostsAssociatedWithEachProcessor_ ? output : false;
7622 
7623  // Compare nonlocals_ -- std::map<GlobalOrdinal, std::vector<GlobalOrdinal> >
7624  // nonlocals_ isa std::map<GO, std::vector<GO> >
7625  output = compare_nonlocals(this->nonlocals_, graph.nonlocals_) ? output : false;
7626 
7627  // Compare k_numAllocPerRow_ isa Kokkos::View::HostMirror
7628  // - since this is a HostMirror type, it should be in host memory already
7629  output = this->k_numAllocPerRow_.extent(0) == graph.k_numAllocPerRow_.extent(0) ? output : false;
7630  if(output && this->k_numAllocPerRow_.extent(0) > 0)
7631  {
7632  for(size_t i=0; output && i<this->k_numAllocPerRow_.extent(0); i++)
7633  output = this->k_numAllocPerRow_(i) == graph.k_numAllocPerRow_(i) ? output : false;
7634  }
7635 
7636  // Compare k_numRowEntries_ isa Kokkos::View::HostMirror
7637  // - since this is a HostMirror type, it should be in host memory already
7638  output = this->k_numRowEntries_.extent(0) == graph.k_numRowEntries_.extent(0) ? output : false;
7639  if(output && this->k_numRowEntries_.extent(0) > 0)
7640  {
7641  for(size_t i = 0; output && i < this->k_numRowEntries_.extent(0); i++)
7642  output = this->k_numRowEntries_(i) == graph.k_numRowEntries_(i) ? output : false;
7643  }
7644 
7645  // Compare this->k_rowPtrs_ isa Kokkos::View<LocalOrdinal*, ...>
7646  output = this->k_rowPtrs_.extent(0) == graph.k_rowPtrs_.extent(0) ? output : false;
7647  if(output && this->k_rowPtrs_.extent(0) > 0)
7648  {
7649  typename local_graph_type::row_map_type::const_type::HostMirror k_rowPtrs_host_this = Kokkos::create_mirror_view(this->k_rowPtrs_);
7650  typename local_graph_type::row_map_type::const_type::HostMirror k_rowPtrs_host_graph= Kokkos::create_mirror_view(graph.k_rowPtrs_);
7651  Kokkos::deep_copy(k_rowPtrs_host_this, this->k_rowPtrs_);
7652  Kokkos::deep_copy(k_rowPtrs_host_graph, graph.k_rowPtrs_);
7653  for(size_t i=0; output && i<k_rowPtrs_host_this.extent(0); i++)
7654  output = k_rowPtrs_host_this(i) == k_rowPtrs_host_graph(i) ? output : false;
7655  }
7656 
7657  // Compare k_lclInds1D_ isa Kokkos::View<LocalOrdinal*, ...>
7658  output = this->k_lclInds1D_.extent(0) == graph.k_lclInds1D_.extent(0) ? output : false;
7659  if(output && this->k_lclInds1D_.extent(0) > 0)
7660  {
7661  typename local_graph_type::entries_type::non_const_type::HostMirror k_lclInds1D_host_this = Kokkos::create_mirror_view(this->k_lclInds1D_);
7662  typename local_graph_type::entries_type::non_const_type::HostMirror k_lclInds1D_host_graph= Kokkos::create_mirror_view(graph.k_lclInds1D_);
7663  Kokkos::deep_copy(k_lclInds1D_host_this, this->k_lclInds1D_);
7664  Kokkos::deep_copy(k_lclInds1D_host_graph, graph.k_lclInds1D_);
7665  for(size_t i=0; output && i < k_lclInds1D_host_this.extent(0); i++)
7666  output = k_lclInds1D_host_this(i) == k_lclInds1D_host_graph(i) ? output : false;
7667  }
7668 
7669  // Compare k_gblInds1D_ isa Kokkos::View<GlobalOrdinal*, ...>
7670  output = this->k_gblInds1D_.extent(0) == graph.k_gblInds1D_.extent(0) ? output : false;
7671  if(output && this->k_gblInds1D_.extent(0) > 0)
7672  {
7673  typename t_GlobalOrdinal_1D::HostMirror k_gblInds1D_host_this = Kokkos::create_mirror_view(this->k_gblInds1D_);
7674  typename t_GlobalOrdinal_1D::HostMirror k_gblInds1D_host_graph = Kokkos::create_mirror_view(graph.k_gblInds1D_);
7675  Kokkos::deep_copy(k_gblInds1D_host_this, this->k_gblInds1D_);
7676  Kokkos::deep_copy(k_gblInds1D_host_graph, graph.k_gblInds1D_);
7677  for(size_t i=0; output && i<k_gblInds1D_host_this.extent(0); i++)
7678  output = k_gblInds1D_host_this(i) == k_gblInds1D_host_graph(i) ? output : false;
7679  }
7680 
7681  // Check lclGraph_ // isa Kokkos::StaticCrsGraph<LocalOrdinal, Kokkos::LayoutLeft, execution_space>
7682  // Kokkos::StaticCrsGraph has 3 data members in it:
7683  // Kokkos::View<size_type*, ...> row_map (local_graph_type::row_map_type)
7684  // Kokkos::View<data_type*, ...> entries (local_graph_type::entries_type)
7685  // Kokkos::View<size_type*, ...> row_block_offsets (local_graph_type::row_block_type)
7686  // There is currently no Kokkos::StaticCrsGraph comparison function that's built-in, so we will just compare
7687  // the three data items here. This can be replaced if Kokkos ever puts in its own comparison routine.
7688  output = this->lclGraph_.row_map.extent(0) == graph.lclGraph_.row_map.extent(0) ? output : false;
7689  if(output && this->lclGraph_.row_map.extent(0) > 0)
7690  {
7691  typename local_graph_type::row_map_type::HostMirror lclGraph_rowmap_host_this = Kokkos::create_mirror_view(this->lclGraph_.row_map);
7692  typename local_graph_type::row_map_type::HostMirror lclGraph_rowmap_host_graph = Kokkos::create_mirror_view(graph.lclGraph_.row_map);
7693  Kokkos::deep_copy(lclGraph_rowmap_host_this, this->lclGraph_.row_map);
7694  Kokkos::deep_copy(lclGraph_rowmap_host_graph, graph.lclGraph_.row_map);
7695  for(size_t i=0; output && i<lclGraph_rowmap_host_this.extent(0); i++)
7696  output = lclGraph_rowmap_host_this(i) == lclGraph_rowmap_host_graph(i) ? output : false;
7697  }
7698 
7699  output = this->lclGraph_.entries.extent(0) == graph.lclGraph_.entries.extent(0) ? output : false;
7700  if(output && this->lclGraph_.entries.extent(0) > 0)
7701  {
7702  typename local_graph_type::entries_type::HostMirror lclGraph_entries_host_this = Kokkos::create_mirror_view(this->lclGraph_.entries);
7703  typename local_graph_type::entries_type::HostMirror lclGraph_entries_host_graph = Kokkos::create_mirror_view(graph.lclGraph_.entries);
7704  Kokkos::deep_copy(lclGraph_entries_host_this, this->lclGraph_.entries);
7705  Kokkos::deep_copy(lclGraph_entries_host_graph, graph.lclGraph_.entries);
7706  for(size_t i=0; output && i<lclGraph_entries_host_this.extent(0); i++)
7707  output = lclGraph_entries_host_this(i) == lclGraph_entries_host_graph(i) ? output : false;
7708  }
7709 
7710  output = this->lclGraph_.row_block_offsets.extent(0) == graph.lclGraph_.row_block_offsets.extent(0) ? output : false;
7711  if(output && this->lclGraph_.row_block_offsets.extent(0) > 0)
7712  {
7713  typename local_graph_type::row_block_type::HostMirror lclGraph_rbo_host_this = Kokkos::create_mirror_view(this->lclGraph_.row_block_offsets);
7714  typename local_graph_type::row_block_type::HostMirror lclGraph_rbo_host_graph = Kokkos::create_mirror_view(graph.lclGraph_.row_block_offsets);
7715  Kokkos::deep_copy(lclGraph_rbo_host_this, this->lclGraph_.row_block_offsets);
7716  Kokkos::deep_copy(lclGraph_rbo_host_graph, graph.lclGraph_.row_block_offsets);
7717  for(size_t i=0; output && i < lclGraph_rbo_host_this.extent(0); i++)
7718  output = lclGraph_rbo_host_this(i) == lclGraph_rbo_host_graph(i) ? output : false;
7719  }
7720 
7721  // For the Importer and Exporter, we shouldn't need to explicitly check them since
7722  // they will be consistent with the maps.
7723  // Note: importer_ isa Teuchos::RCP<const import_type>
7724  // exporter_ isa Teuchos::RCP<const export_type>
7725 
7726  return output;
7727  }
7728 
7729 
7730 
7731 } // namespace Tpetra
7732 
7733 //
7734 // Explicit instantiation macros
7735 //
7736 // Must be expanded from within the Tpetra namespace!
7737 //
7738 
7739 #define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7740  template<> \
7741  Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7742  importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7743  const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7744  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7745  CrsGraph<LO,GO,NODE>::node_type>& importer, \
7746  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7747  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7748  CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7749  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7750  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7751  CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7752  const Teuchos::RCP<Teuchos::ParameterList>& params);
7753 
7754 #define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
7755  template<> \
7756  Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7757  importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7758  const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7759  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7760  CrsGraph<LO,GO,NODE>::node_type>& rowImporter, \
7761  const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7762  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7763  CrsGraph<LO,GO,NODE>::node_type>& domainImporter, \
7764  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7765  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7766  CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7767  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7768  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7769  CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7770  const Teuchos::RCP<Teuchos::ParameterList>& params);
7771 
7772 
7773 #define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7774  template<> \
7775  Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7776  exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7777  const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7778  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7779  CrsGraph<LO,GO,NODE>::node_type>& exporter, \
7780  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7781  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7782  CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7783  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7784  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7785  CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7786  const Teuchos::RCP<Teuchos::ParameterList>& params);
7787 
7788 #define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
7789  template<> \
7790  Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7791  exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7792  const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7793  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7794  CrsGraph<LO,GO,NODE>::node_type>& rowExporter, \
7795  const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7796  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7797  CrsGraph<LO,GO,NODE>::node_type>& domainExporter, \
7798  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7799  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7800  CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7801  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7802  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7803  CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7804  const Teuchos::RCP<Teuchos::ParameterList>& params);
7805 
7806 
7807 #define TPETRA_CRSGRAPH_INSTANT( LO, GO, NODE ) \
7808  template class CrsGraph<LO, GO, NODE>; \
7809  TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7810  TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7811  TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
7812  TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE)
7813 
7814 
7815 #endif // TPETRA_CRSGRAPH_DEF_HPP
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
void setDomainRangeMaps(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap)
Teuchos::RCP< const map_type > getRowMap() const override
Returns the Map that describes the row distribution in this graph.
void setAllIndices(const typename local_graph_type::row_map_type &rowPointers, const typename local_graph_type::entries_type::non_const_type &columnIndices)
Set the graph&#39;s data directly, using 1-D storage.
size_t nodeMaxNumRowEntries_
Local maximum of the number of entries in each row.
bool indicesAreSorted_
Whether the graph&#39;s indices are sorted in each row, on this process.
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types...
Teuchos::RCP< const map_type > rangeMap_
The Map describing the range of the (matrix corresponding to the) graph.
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
GlobalOrdinal global_ordinal_type
The type of the graph&#39;s global indices.
Kokkos::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, device_type > local_graph_type
The type of the part of the sparse graph on each MPI process.
size_t getNodeMaxNumRowEntries() const override
Maximum number of entries in any row of the graph, on this process.
void insertGlobalIndicesFiltered(const local_ordinal_type lclRow, const global_ordinal_type gblColInds[], const local_ordinal_type numGblColInds)
Like insertGlobalIndices(), but with column Map filtering.
bool sortGhostsAssociatedWithEachProcessor_
Whether to require makeColMap() (and therefore fillComplete()) to order column Map GIDs associated wi...
size_t insertGlobalIndicesImpl(const local_ordinal_type lclRow, const global_ordinal_type inputGblColInds[], const size_t numInputInds)
Insert global indices, using an input local row index.
static KOKKOS_INLINE_FUNCTION size_t unpackValue(LO &outVal, const char inBuf[])
Unpack the given value from the given output buffer.
void resumeFill(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Resume fill operations.
bool haveGlobalConstants_
Whether all processes have computed global constants.
void expertStaticFillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< const import_type > &importer=Teuchos::null, const Teuchos::RCP< const export_type > &exporter=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Perform a fillComplete on a graph that already has data, via setAllIndices().
Teuchos::RCP< const import_type > getImporter() const override
Returns the importer associated with this graph.
void getLocalDiagOffsets(const Kokkos::View< size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Get offsets of the diagonal entries in the graph.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Kokkos::View< global_ordinal_type *, execution_space > t_GlobalOrdinal_1D
Type of the k_gblInds1D_ array of global column indices.
t_GlobalOrdinal_1D k_gblInds1D_
Global column indices for all rows.
void insertGlobalIndices(const global_ordinal_type globalRow, const Teuchos::ArrayView< const global_ordinal_type > &indices)
Insert global indices into the graph.
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Get the number of entries in the given row (local index).
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const override
Default parameter list suitable for validation.
Declaration of a function that prints strings from each process.
void computeLocalConstants()
Compute local constants, if they have not yet been computed.
bool isIdenticalTo(const CrsGraph< LocalOrdinal, GlobalOrdinal, Node > &graph) const
Create a cloned CrsGraph for a different Node type.
bool isMerged() const
Whether duplicate column indices in each row have been merged.
virtual bool checkSizes(const SrcDistObject &source) override
Compare the source and target (this) objects for compatibility.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
size_t getGlobalMaxNumRowEntries() const override
Maximum number of entries in any row of the graph, over all processes in the graph&#39;s communicator...
void getLocalRowView(const local_ordinal_type lclRow, Teuchos::ArrayView< const local_ordinal_type > &lclColInds) const override
Get a const, non-persisting view of the given local row&#39;s local column indices, as a Teuchos::ArrayVi...
Declare and define Tpetra::Details::copyOffsets, an implementation detail of Tpetra (in particular...
bool noRedundancies_
Whether the graph&#39;s indices are non-redundant (merged) in each row, on this process.
OffsetsViewType::non_const_value_type computeOffsetsFromConstantCount(const OffsetsViewType &ptr, const CountType count)
Compute offsets from a constant count.
static bool debug()
Whether Tpetra is in debug mode.
size_t findLocalIndices(const RowInfo &rowInfo, const Teuchos::ArrayView< const local_ordinal_type > &indices, std::function< void(const size_t, const size_t, const size_t)> fun) const
Finds indices in the given row.
size_t getNumAllocatedEntriesInGlobalRow(global_ordinal_type globalRow) const
Current number of allocated entries in the given row on the calling (MPI) process, using a global row index.
Allocation information for a locally owned row in a CrsGraph or CrsMatrix.
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, Distributor &distor, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks...
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
bool hasColMap() const override
Whether the graph has a column Map.
bool isGloballyIndexed() const override
Whether the graph&#39;s column indices are stored as global indices.
void exportAndFillComplete(Teuchos::RCP< CrsGraph< local_ordinal_type, global_ordinal_type, Node > > &destGraph, const export_type &exporter, const Teuchos::RCP< const map_type > &domainMap=Teuchos::null, const Teuchos::RCP< const map_type > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Export from this to the given destination graph, and make the result fill complete.
Teuchos_Ordinal Array_size_type
Size type for Teuchos Array objects.
int local_ordinal_type
Default value of Scalar template parameter.
bool isStorageOptimized() const
Returns true if storage has been optimized.
bool haveLocalConstants_
Whether this process has computed local constants.
std::string description() const override
Return a one-line human-readable description of this object.
local_ordinal_type getLocalViewRawConst(const local_ordinal_type *&lclInds, local_ordinal_type &capacity, const RowInfo &rowInfo) const
Get a pointer to the local column indices of a locally owned row, using the result of getRowInfo...
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator, in rank order.
void makeColMap(Teuchos::Array< int > &remotePIDs)
Make and set the graph&#39;s column Map.
Teuchos::RCP< const export_type > getExporter() const override
Returns the exporter associated with this graph.
std::pair< size_t, std::string > makeIndicesLocal(const bool verbose=false)
Convert column indices from global to local.
void getNumEntriesPerLocalRowUpperBound(Teuchos::ArrayRCP< const size_t > &boundPerLocalRow, size_t &boundForAllLocalRows, bool &boundSameForAllLocalRows) const
Get an upper bound on the number of entries that can be stored in each row.
size_t global_size_t
Global size_t object.
size_t getNodeNumEntries() const override
The local number of entries in the graph.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap) override
Remove processes owning zero rows from the Maps and their communicator.
Node node_type
This class&#39; Kokkos Node type.
void reindexColumns(const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortIndicesInEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
void makeImportExport(Teuchos::Array< int > &remotePIDs, const bool useRemotePIDs)
Make the Import and Export objects, if needed.
size_t sortAndMergeRowIndices(const RowInfo &rowInfo, const bool sorted, const bool merged)
Sort and merge duplicate column indices in the given row.
void insertLocalIndices(const local_ordinal_type localRow, const Teuchos::ArrayView< const local_ordinal_type > &indices)
Insert local indices into the graph.
Insert new values that don&#39;t currently exist.
void getGlobalRowCopy(global_ordinal_type gblRow, const Teuchos::ArrayView< global_ordinal_type > &gblColInds, size_t &numColInds) const override
Get a copy of the given row, using global indices.
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices, const Padding &padding, const int my_rank, const bool verbose)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries...
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified, possibly nondefault Kokkos Node type.
Teuchos::RCP< const map_type > rowMap_
The Map describing the distribution of rows of the graph.
Teuchos::RCP< const import_type > importer_
The Import from the domain Map to the column Map.
size_t insertCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, InOutIndices &curIndices, size_t &numAssigned, InIndices const &newIndices, std::function< void(const size_t, const size_t, const size_t)> cb=std::function< void(const size_t, const size_t, const size_t)>())
Insert new indices in to current list of indices.
num_row_entries_type k_numRowEntries_
The number of local entries in each locally owned row.
Teuchos::RCP< const map_type > domainMap_
The Map describing the domain of the (matrix corresponding to the) graph.
OffsetType convertColumnIndicesFromGlobalToLocal(const Kokkos::View< LO *, DT > &lclColInds, const Kokkos::View< const GO *, DT > &gblColInds, const Kokkos::View< const OffsetType *, DT > &ptr, const LocalMap< LO, GO, DT > &lclColMap, const Kokkos::View< const NumEntType *, DT > &numRowEnt)
Convert a CrsGraph&#39;s global column indices into local column indices.
global_size_t getGlobalNumRows() const override
Returns the number of global rows in the graph.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
Teuchos::ArrayView< const global_ordinal_type > getGlobalView(const RowInfo &rowinfo) const
Get a const, nonowned, globally indexed view of the locally owned row myRow, such that rowinfo = getR...
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
Functions for manipulating CRS arrays.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
void setLocallyModified()
Report that we made a local modification to its structure.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
local_graph_type::entries_type::non_const_type k_lclInds1D_
Local column indices for all rows.
Teuchos::ArrayView< local_ordinal_type > getLocalViewNonConst(const RowInfo &rowinfo)
Get a nonconst, nonowned, locally indexed view of the locally owned row myRow, such that rowinfo = ge...
size_t getNodeNumRows() const override
Returns the number of graph rows owned on the calling node.
ProfileType getProfileType() const
Returns true if the graph was allocated with static data structures.
void checkInternalState() const
Throw an exception if the internal state is not consistent.
bool isNodeGlobalElement(global_ordinal_type globalIndex) const
Whether the given global index is owned by this Map on the calling process.
local_graph_type lclGraph_
Local graph; only initialized after first fillComplete() call.
Sets up and executes a communication plan for a Tpetra DistObject.
local_graph_type::row_map_type::const_type k_rowPtrs_
Row offsets for &quot;1-D&quot; storage.
size_t findCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, const size_t curNumEntries, Indices1 const &curIndices, Indices2 const &newIndices, Callback &&cb)
Finds offsets in to current list of indices.
static bool verbose()
Whether Tpetra is in verbose mode.
CombineMode
Rule for combining data in an Import or Export.
Kokkos::View< const size_t *, execution_space >::HostMirror k_numAllocPerRow_
The maximum number of entries to allow in each locally owned row, per row.
Teuchos::RCP< const export_type > exporter_
The Export from the row Map to the range Map.
void insertGlobalIndicesIntoNonownedRows(const global_ordinal_type gblRow, const global_ordinal_type gblColInds[], const local_ordinal_type numGblColInds)
Implementation of insertGlobalIndices for nonowned rows.
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, Distributor &distor, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
bool isFillComplete() const override
Whether fillComplete() has been called and the graph is in compute mode.
size_t getNumAllocatedEntriesInLocalRow(local_ordinal_type localRow) const
Current number of allocated entries in the given row on the calling (MPI) process, using a local row index.
Teuchos::ArrayRCP< const size_t > getNodeRowPtrs() const
Get a host view of the row offsets.
void swap(CrsGraph< local_ordinal_type, global_ordinal_type, Node > &graph)
Swaps the data from *this with the data and maps from graph.
void globalAssemble()
Communicate nonlocal contributions to other processes.
typename device_type::execution_space execution_space
This class&#39; Kokkos execution space.
RowInfo getRowInfoFromGlobalRowIndex(const global_ordinal_type gblRow) const
Get information about the locally owned row with global index gblRow.
Utility functions for packing and unpacking sparse matrix entries.
void packCrsGraph(const CrsGraph< LO, GO, NT > &sourceGraph, Teuchos::Array< typename CrsGraph< LO, GO, NT >::packet_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse graph for communication.
Abstract base class for objects that can be the source of an Import or Export operation.
void removeLocalIndices(local_ordinal_type localRow)
Remove all graph indices from the specified local row.
size_t getNodeAllocationSize() const
The local number of indices allocated for the graph, over all rows on the calling (MPI) process...
LocalOrdinal local_ordinal_type
The type of the graph&#39;s local indices.
global_size_t globalNumEntries_
Global number of entries in the graph.
Teuchos::ArrayView< global_ordinal_type > getGlobalViewNonConst(const RowInfo &rowinfo)
Get a nonconst, nonowned, globally indexed view of the locally owned row myRow, such that rowinfo = g...
Teuchos::RCP< const map_type > getRangeMap() const override
Returns the Map associated with the domain of this graph.
void replaceColMap(const Teuchos::RCP< const map_type > &newColMap)
Replace the graph&#39;s current column Map with the given Map.
void getGlobalRowView(const global_ordinal_type gblRow, Teuchos::ArrayView< const global_ordinal_type > &gblColInds) const override
Get a const, non-persisting view of the given global row&#39;s global column indices, as a Teuchos::Array...
::Kokkos::Compat::KokkosDeviceWrapperNode< execution_space > node_type
Default value of Node template parameter.
global_size_t getGlobalNumCols() const override
Returns the number of global columns in the graph.
RowInfo getRowInfo(const local_ordinal_type myRow) const
Get information about the locally owned row with local index myRow.
size_t getNodeNumCols() const override
Returns the number of columns connected to the locally owned rows of this graph.
void replaceDomainMapAndImporter(const Teuchos::RCP< const map_type > &newDomainMap, const Teuchos::RCP< const import_type > &newImporter)
Replace the current domain Map and Import with the given parameters.
bool supportsRowViews() const override
Whether this class implements getLocalRowView() and getGlobalRowView() (it does). ...
local_map_type getLocalMap() const
Get the local Map for Kokkos kernels.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
Teuchos::ArrayView< const local_ordinal_type > getLocalView(const RowInfo &rowinfo) const
Get a const, nonowned, locally indexed view of the locally owned row myRow, such that rowinfo = getRo...
Details::EStorageStatus storageStatus_
Status of the graph&#39;s storage, when not in a fill-complete state.
A parallel distribution of indices over processes.
local_ordinal_type getGlobalViewRawConst(const global_ordinal_type *&gblInds, local_ordinal_type &capacity, const RowInfo &rowInfo) const
Get a pointer to the global column indices of a locally owned row, using the result of getRowInfoFrom...
int makeColMap(Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &colMap, Teuchos::Array< int > &remotePIDs, const Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &domMap, const RowGraph< LO, GO, NT > &graph, const bool sortEachProcsGids=true, std::ostream *errStrm=NULL)
Make the graph&#39;s column Map.
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const LO &)
Number of bytes required to pack or unpack the given value of type value_type.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects...
void setParameterList(const Teuchos::RCP< Teuchos::ParameterList > &params) override
Set the given list of parameters (must be nonnull).
size_t getNumEntriesInGlobalRow(global_ordinal_type globalRow) const override
Returns the current number of entries on this node in the specified global row.
void fillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Tell the graph that you are done changing its structure.
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
typename Node::device_type device_type
This class&#39; Kokkos device type.
bool isLocallyIndexed() const override
Whether the graph&#39;s column indices are stored as local indices.
A distributed dense vector.
Stand-alone utility functions and macros.
Teuchos::ArrayRCP< const local_ordinal_type > getNodePackedIndices() const
Get an Teuchos::ArrayRCP of the packed column-indices.
bool isFillActive() const
Whether resumeFill() has been called and the graph is in edit mode.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
Returns the communicator.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
void importAndFillComplete(Teuchos::RCP< CrsGraph< local_ordinal_type, global_ordinal_type, Node > > &destGraph, const import_type &importer, const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Import from this to the given destination graph, and make the result fill complete.
global_size_t globalMaxNumRowEntries_
Global maximum of the number of entries in each row.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
local_graph_type getLocalGraph() const
Get the local graph.
void packCrsGraphNew(const CrsGraph< LO, GO, NT > &sourceGraph, const Kokkos::DualView< const LO *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportLIDs, const Kokkos::DualView< const int *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportPIDs, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports, Kokkos::DualView< size_t *, typename CrsGraph< LO, GO, NT >::buffer_device_type > numPacketsPerLID, size_t &constantNumPackets, const bool pack_pids, Distributor &distor)
Pack specified entries of the given local sparse graph for communication, for &quot;new&quot; DistObject interf...
size_t insertIndices(RowInfo &rowInfo, const SLocalGlobalViews &newInds, const ELocalGlobal lg, const ELocalGlobal I)
Insert indices into the given row.
global_size_t getGlobalNumEntries() const override
Returns the global number of entries in the graph.
Declaration and definition of Tpetra::Details::getEntryOnHost.
size_t numAllocForAllRows_
The maximum number of entries to allow in each locally owned row.
Teuchos::RCP< const map_type > colMap_
The Map describing the distribution of columns of the graph.
global_ordinal_type getIndexBase() const override
Returns the index base for global indices for this graph.
void packCrsGraphWithOwningPIDs(const CrsGraph< LO, GO, NT > &sourceGraph, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse graph for communication.
virtual void pack(const Teuchos::ArrayView< const local_ordinal_type > &exportLIDs, Teuchos::Array< global_ordinal_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, size_t &constantNumPackets, Distributor &distor) const override
Pack this object&#39;s data for Import or Export.
void getLocalRowCopy(local_ordinal_type lclRow, const Teuchos::ArrayView< local_ordinal_type > &lclColInds, size_t &numColInds) const override
Get a copy of the given row, using local indices.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
nonlocals_type nonlocals_
Nonlocal data given to insertGlobalIndices.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const override
Print this object to the given output stream with the given verbosity level.
void computeGlobalConstants()
Compute global constants, if they have not yet been computed.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.