Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Tpetra_CrsGraph_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_CRSGRAPH_DEF_HPP
43 #define TPETRA_CRSGRAPH_DEF_HPP
44 
52 
56 #include "Tpetra_Details_gathervPrint.hpp"
57 #include "Tpetra_Details_getGraphDiagOffsets.hpp"
58 #include "Tpetra_Details_makeColMap.hpp"
62 #include "Tpetra_Distributor.hpp"
63 #include "Teuchos_SerialDenseMatrix.hpp"
64 #include "Tpetra_Vector.hpp"
65 #include "Tpetra_Import_Util.hpp"
66 #include "Tpetra_Import_Util2.hpp"
67 #include "Tpetra_Details_packCrsGraph.hpp"
68 #include "Tpetra_Details_unpackCrsGraphAndCombine.hpp"
70 #include <algorithm>
71 #include <limits>
72 #include <map>
73 #include <sstream>
74 #include <string>
75 #include <type_traits>
76 #include <utility>
77 #include <vector>
78 
79 namespace Tpetra {
80  namespace Details {
81  namespace Impl {
82 
83  template<class LO, class GO, class DT, class OffsetType, class NumEntType>
84  class ConvertColumnIndicesFromGlobalToLocal {
85  public:
86  ConvertColumnIndicesFromGlobalToLocal (const ::Kokkos::View<LO*, DT>& lclColInds,
87  const ::Kokkos::View<const GO*, DT>& gblColInds,
88  const ::Kokkos::View<const OffsetType*, DT>& ptr,
89  const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
90  const ::Kokkos::View<const NumEntType*, DT>& numRowEnt) :
91  lclColInds_ (lclColInds),
92  gblColInds_ (gblColInds),
93  ptr_ (ptr),
94  lclColMap_ (lclColMap),
95  numRowEnt_ (numRowEnt)
96  {}
97 
98  KOKKOS_FUNCTION void
99  operator () (const LO& lclRow, OffsetType& curNumBad) const
100  {
101  const OffsetType offset = ptr_(lclRow);
102  // NOTE (mfh 26 Jun 2016) It's always legal to cast the number
103  // of entries in a row to LO, as long as the row doesn't have
104  // too many duplicate entries.
105  const LO numEnt = static_cast<LO> (numRowEnt_(lclRow));
106  for (LO j = 0; j < numEnt; ++j) {
107  const GO gid = gblColInds_(offset + j);
108  const LO lid = lclColMap_.getLocalElement (gid);
109  lclColInds_(offset + j) = lid;
110  if (lid == ::Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
111  ++curNumBad;
112  }
113  }
114  }
115 
116  static OffsetType
117  run (const ::Kokkos::View<LO*, DT>& lclColInds,
118  const ::Kokkos::View<const GO*, DT>& gblColInds,
119  const ::Kokkos::View<const OffsetType*, DT>& ptr,
120  const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
121  const ::Kokkos::View<const NumEntType*, DT>& numRowEnt)
122  {
123  typedef ::Kokkos::RangePolicy<typename DT::execution_space, LO> range_type;
124  typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> functor_type;
125 
126  const LO lclNumRows = ptr.extent (0) == 0 ?
127  static_cast<LO> (0) : static_cast<LO> (ptr.extent (0) - 1);
128  OffsetType numBad = 0;
129  // Count of "bad" column indices is a reduction over rows.
130  ::Kokkos::parallel_reduce (range_type (0, lclNumRows),
131  functor_type (lclColInds, gblColInds, ptr,
132  lclColMap, numRowEnt),
133  numBad);
134  return numBad;
135  }
136 
137  private:
138  ::Kokkos::View<LO*, DT> lclColInds_;
139  ::Kokkos::View<const GO*, DT> gblColInds_;
140  ::Kokkos::View<const OffsetType*, DT> ptr_;
142  ::Kokkos::View<const NumEntType*, DT> numRowEnt_;
143  };
144 
145  } // namespace Impl
146 
161  template<class LO, class GO, class DT, class OffsetType, class NumEntType>
162  OffsetType
163  convertColumnIndicesFromGlobalToLocal (const Kokkos::View<LO*, DT>& lclColInds,
164  const Kokkos::View<const GO*, DT>& gblColInds,
165  const Kokkos::View<const OffsetType*, DT>& ptr,
166  const LocalMap<LO, GO, DT>& lclColMap,
167  const Kokkos::View<const NumEntType*, DT>& numRowEnt)
168  {
169  using Impl::ConvertColumnIndicesFromGlobalToLocal;
170  typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> impl_type;
171  return impl_type::run (lclColInds, gblColInds, ptr, lclColMap, numRowEnt);
172  }
173 
174  template<class ViewType, class LO>
175  class MaxDifference {
176  public:
177  MaxDifference (const ViewType& ptr) : ptr_ (ptr) {}
178 
179  KOKKOS_INLINE_FUNCTION void init (LO& dst) const {
180  dst = 0;
181  }
182 
183  KOKKOS_INLINE_FUNCTION void
184  join (volatile LO& dst, const volatile LO& src) const
185  {
186  dst = (src > dst) ? src : dst;
187  }
188 
189  KOKKOS_INLINE_FUNCTION void
190  operator () (const LO lclRow, LO& maxNumEnt) const
191  {
192  const LO numEnt = static_cast<LO> (ptr_(lclRow+1) - ptr_(lclRow));
193  maxNumEnt = (numEnt > maxNumEnt) ? numEnt : maxNumEnt;
194  }
195  private:
196  typename ViewType::const_type ptr_;
197  };
198 
199  template<class ViewType, class LO>
200  typename ViewType::non_const_value_type
201  maxDifference (const char kernelLabel[],
202  const ViewType& ptr,
203  const LO lclNumRows)
204  {
205  if (lclNumRows == 0) {
206  // mfh 07 May 2018: Weirdly, I need this special case,
207  // otherwise I get the wrong answer.
208  return static_cast<LO> (0);
209  }
210  else {
211  using execution_space = typename ViewType::execution_space;
212  using range_type = Kokkos::RangePolicy<execution_space, LO>;
213  LO theMaxNumEnt {0};
214  Kokkos::parallel_reduce (kernelLabel,
215  range_type (0, lclNumRows),
216  MaxDifference<ViewType, LO> (ptr),
217  theMaxNumEnt);
218  return theMaxNumEnt;
219  }
220  }
221 
222  } // namespace Details
223 
224  template <class LocalOrdinal, class GlobalOrdinal, class Node>
225  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
226  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
227  const size_t maxNumEntriesPerRow,
228  const ProfileType pftype,
229  const Teuchos::RCP<Teuchos::ParameterList>& params) :
230  dist_object_type (rowMap)
231  , rowMap_ (rowMap)
232  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
233  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
234  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
235  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
236  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
237  , pftype_ (pftype)
238  , numAllocForAllRows_ (maxNumEntriesPerRow)
239  , storageStatus_ (pftype == StaticProfile ?
240  ::Tpetra::Details::STORAGE_1D_UNPACKED :
241  ::Tpetra::Details::STORAGE_2D)
242  , indicesAreAllocated_ (false)
243  , indicesAreLocal_ (false)
244  , indicesAreGlobal_ (false)
245  , fillComplete_ (false)
246  , lowerTriangular_ (false)
247  , upperTriangular_ (false)
248  , indicesAreSorted_ (true)
249  , noRedundancies_ (true)
250  , haveLocalConstants_ (false)
251  , haveGlobalConstants_ (false)
252  , sortGhostsAssociatedWithEachProcessor_ (true)
253  {
254  const char tfecfFuncName[] = "CrsGraph(rowMap,maxNumEntriesPerRow,"
255  "pftype,params): ";
256  staticAssertions ();
257  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
258  (maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid (),
259  std::invalid_argument, "The allocation hint maxNumEntriesPerRow must be "
260  "a valid size_t value, which in this case means it must not be "
261  "Teuchos::OrdinalTraits<size_t>::invalid().");
262  resumeFill (params);
264  }
265 
266  template <class LocalOrdinal, class GlobalOrdinal, class Node>
268  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
269  const Teuchos::RCP<const map_type>& colMap,
270  const size_t maxNumEntriesPerRow,
271  const ProfileType pftype,
272  const Teuchos::RCP<Teuchos::ParameterList>& params) :
273  dist_object_type (rowMap)
274  , rowMap_ (rowMap)
275  , colMap_ (colMap)
276  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
277  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
278  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
279  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
280  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
281  , pftype_ (pftype)
282  , numAllocForAllRows_ (maxNumEntriesPerRow)
283  , storageStatus_ (pftype == StaticProfile ?
284  ::Tpetra::Details::STORAGE_1D_UNPACKED :
285  ::Tpetra::Details::STORAGE_2D)
286  , indicesAreAllocated_ (false)
287  , indicesAreLocal_ (false)
288  , indicesAreGlobal_ (false)
289  , fillComplete_ (false)
290  , lowerTriangular_ (false)
291  , upperTriangular_ (false)
292  , indicesAreSorted_ (true)
293  , noRedundancies_ (true)
294  , haveLocalConstants_ (false)
295  , haveGlobalConstants_ (false)
296  , sortGhostsAssociatedWithEachProcessor_ (true)
297  {
298  const char tfecfFuncName[] = "CrsGraph(rowMap,colMap,maxNumEntriesPerRow,"
299  "pftype,params): ";
300  staticAssertions ();
301  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
302  maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid (),
303  std::invalid_argument, "The allocation hint maxNumEntriesPerRow must be "
304  "a valid size_t value, which in this case means it must not be "
305  "Teuchos::OrdinalTraits<size_t>::invalid().");
306  resumeFill (params);
308  }
309 
310  template <class LocalOrdinal, class GlobalOrdinal, class Node>
312  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
313  const Teuchos::ArrayView<const size_t>& numEntPerRow,
314  const ProfileType pftype,
315  const Teuchos::RCP<Teuchos::ParameterList>& params) :
316  dist_object_type (rowMap)
317  , rowMap_ (rowMap)
318  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
319  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
320  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
321  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
322  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
323  , pftype_ (pftype)
324  , numAllocForAllRows_ (0)
325  , storageStatus_ (pftype == StaticProfile ?
326  ::Tpetra::Details::STORAGE_1D_UNPACKED :
327  ::Tpetra::Details::STORAGE_2D)
328  , indicesAreAllocated_ (false)
329  , indicesAreLocal_ (false)
330  , indicesAreGlobal_ (false)
331  , fillComplete_ (false)
332  , lowerTriangular_ (false)
333  , upperTriangular_ (false)
334  , indicesAreSorted_ (true)
335  , noRedundancies_ (true)
336  , haveLocalConstants_ (false)
337  , haveGlobalConstants_ (false)
338  , sortGhostsAssociatedWithEachProcessor_ (true)
339  {
340  const char tfecfFuncName[] = "CrsGraph(rowMap,numEntPerRow,pftype,params): ";
341  staticAssertions ();
342 
343  const size_t lclNumRows = rowMap.is_null () ?
344  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
345  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
346  static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
347  std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
348  << " != the local number of rows " << lclNumRows << " as specified by "
349  "the input row Map.");
350 
351  const bool debug = ::Tpetra::Details::Behavior::debug ();
352  if (debug) {
353  for (size_t r = 0; r < lclNumRows; ++r) {
354  const size_t curRowCount = numEntPerRow[r];
355  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
356  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
357  std::invalid_argument, "numEntPerRow(" << r << ") "
358  "specifies an invalid number of entries "
359  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
360  }
361  }
362 
363  // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
364  // The latter is a const View, so we have to copy into a nonconst
365  // View first, then assign.
366  typedef decltype (k_numAllocPerRow_) out_view_type;
367  typedef typename out_view_type::non_const_type nc_view_type;
368  typedef Kokkos::View<const size_t*,
369  typename nc_view_type::array_layout,
370  Kokkos::HostSpace,
371  Kokkos::MemoryUnmanaged> in_view_type;
372  in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
373  nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
374  lclNumRows);
375  Kokkos::deep_copy (numAllocPerRowOut, numAllocPerRowIn);
376  k_numAllocPerRow_ = numAllocPerRowOut;
377 
378  resumeFill (params);
380  }
381 
382 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
383  template <class LocalOrdinal, class GlobalOrdinal, class Node>
385  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
386  const Teuchos::ArrayRCP<const size_t>& numEntPerRow,
387  const ProfileType pftype,
388  const Teuchos::RCP<Teuchos::ParameterList>& params) :
389  dist_object_type (rowMap)
390  , rowMap_ (rowMap)
391  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
392  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
393  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
394  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
395  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
396  , pftype_ (pftype)
397  , numAllocForAllRows_ (0)
398  , storageStatus_ (pftype == StaticProfile ?
399  ::Tpetra::Details::STORAGE_1D_UNPACKED :
400  ::Tpetra::Details::STORAGE_2D)
401  , indicesAreAllocated_ (false)
402  , indicesAreLocal_ (false)
403  , indicesAreGlobal_ (false)
404  , fillComplete_ (false)
405  , lowerTriangular_ (false)
406  , upperTriangular_ (false)
407  , indicesAreSorted_ (true)
408  , noRedundancies_ (true)
409  , haveLocalConstants_ (false)
410  , haveGlobalConstants_ (false)
411  , sortGhostsAssociatedWithEachProcessor_ (true)
412  {
413  const char tfecfFuncName[] = "CrsGraph(RCP<const Map>,"
414  "ArrayRCP<const size_t>,ProfileType,RCP<ParameterList>): ";
415  staticAssertions ();
416 
417  const size_t lclNumRows = rowMap.is_null () ?
418  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
419  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
420  static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
421  std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
422  << " != the local number of rows " << lclNumRows << " as specified by "
423  "the input row Map.");
424 
425  const bool debug = ::Tpetra::Details::Behavior::debug ();
426  if (debug) {
427  for (size_t r = 0; r < lclNumRows; ++r) {
428  const size_t curRowCount = numEntPerRow[r];
429  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
430  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
431  std::invalid_argument, "numEntPerRow(" << r << ") "
432  "specifies an invalid number of entries "
433  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
434  }
435  }
436 
437  // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
438  // The latter is a const View, so we have to copy into a nonconst
439  // View first, then assign.
440  typedef decltype (k_numAllocPerRow_) out_view_type;
441  typedef typename out_view_type::non_const_type nc_view_type;
442  typedef Kokkos::View<const size_t*,
443  typename nc_view_type::array_layout,
444  Kokkos::HostSpace,
445  Kokkos::MemoryUnmanaged> in_view_type;
446  in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
447  nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
448  lclNumRows);
449  Kokkos::deep_copy (numAllocPerRowOut, numAllocPerRowIn);
450  k_numAllocPerRow_ = numAllocPerRowOut;
451 
452  resumeFill (params);
454  }
455 #endif // TPETRA_ENABLE_DEPRECATED_CODE
456 
457 
458  template <class LocalOrdinal, class GlobalOrdinal, class Node>
459  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
460  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
461  const Kokkos::DualView<const size_t*, execution_space>& numEntPerRow,
462  const ProfileType pftype,
463  const Teuchos::RCP<Teuchos::ParameterList>& params) :
464  dist_object_type (rowMap)
465  , rowMap_ (rowMap)
466  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
467  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
468  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
469  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
470  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
471  , pftype_ (pftype)
472  , k_numAllocPerRow_ (numEntPerRow.h_view)
473  , numAllocForAllRows_ (0)
474  , storageStatus_ (pftype == StaticProfile ?
475  ::Tpetra::Details::STORAGE_1D_UNPACKED :
476  ::Tpetra::Details::STORAGE_2D)
477  , indicesAreAllocated_ (false)
478  , indicesAreLocal_ (false)
479  , indicesAreGlobal_ (false)
480  , fillComplete_ (false)
481  , lowerTriangular_ (false)
482  , upperTriangular_ (false)
483  , indicesAreSorted_ (true)
484  , noRedundancies_ (true)
485  , haveLocalConstants_ (false)
486  , haveGlobalConstants_ (false)
488  {
489  const char tfecfFuncName[] = "CrsGraph(rowMap,numEntPerRow,pftype,params): ";
490  staticAssertions ();
491 
492  const size_t lclNumRows = rowMap.is_null () ?
493  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
494  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
495  static_cast<size_t> (numEntPerRow.extent (0)) != lclNumRows,
496  std::invalid_argument, "numEntPerRow has length " <<
497  numEntPerRow.extent (0) << " != the local number of rows " <<
498  lclNumRows << " as specified by " "the input row Map.");
499 
500  const bool debug = ::Tpetra::Details::Behavior::debug ();
501  if (debug) {
502  for (size_t r = 0; r < lclNumRows; ++r) {
503  const size_t curRowCount = numEntPerRow.h_view(r);
504  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
505  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
506  std::invalid_argument, "numEntPerRow(" << r << ") "
507  "specifies an invalid number of entries "
508  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
509  }
510  }
511 
512  resumeFill (params);
514  }
515 
516 
517  template <class LocalOrdinal, class GlobalOrdinal, class Node>
519  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
520  const Teuchos::RCP<const map_type>& colMap,
521  const Kokkos::DualView<const size_t*, execution_space>& numEntPerRow,
522  const ProfileType pftype,
523  const Teuchos::RCP<Teuchos::ParameterList>& params) :
524  dist_object_type (rowMap)
525  , rowMap_ (rowMap)
526  , colMap_ (colMap)
527  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
528  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
529  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
530  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
531  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
532  , pftype_ (pftype)
533  , k_numAllocPerRow_ (numEntPerRow.h_view)
534  , numAllocForAllRows_ (0)
535  , storageStatus_ (pftype == StaticProfile ?
536  ::Tpetra::Details::STORAGE_1D_UNPACKED :
537  ::Tpetra::Details::STORAGE_2D)
538  , indicesAreAllocated_ (false)
539  , indicesAreLocal_ (false)
540  , indicesAreGlobal_ (false)
541  , fillComplete_ (false)
542  , lowerTriangular_ (false)
543  , upperTriangular_ (false)
544  , indicesAreSorted_ (true)
545  , noRedundancies_ (true)
546  , haveLocalConstants_ (false)
547  , haveGlobalConstants_ (false)
548  , sortGhostsAssociatedWithEachProcessor_ (true)
549  {
550  const char tfecfFuncName[] = "CrsGraph(rowMap,colMap,numEntPerRow,pftype,params): ";
551  staticAssertions ();
552 
553  const size_t lclNumRows = rowMap.is_null () ?
554  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
555  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
556  static_cast<size_t> (numEntPerRow.extent (0)) != lclNumRows,
557  std::invalid_argument, "numEntPerRow has length " <<
558  numEntPerRow.extent (0) << " != the local number of rows " <<
559  lclNumRows << " as specified by " "the input row Map.");
560 
561  const bool debug = ::Tpetra::Details::Behavior::debug ();
562  if (debug) {
563  for (size_t r = 0; r < lclNumRows; ++r) {
564  const size_t curRowCount = numEntPerRow.h_view(r);
565  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
566  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
567  std::invalid_argument, "numEntPerRow(" << r << ") "
568  "specifies an invalid number of entries "
569  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
570  }
571  }
572 
573  resumeFill (params);
575  }
576 
577 
578  template <class LocalOrdinal, class GlobalOrdinal, class Node>
580  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
581  const Teuchos::RCP<const map_type>& colMap,
582  const Teuchos::ArrayView<const size_t>& numEntPerRow,
583  const ProfileType pftype,
584  const Teuchos::RCP<Teuchos::ParameterList>& params) :
585  dist_object_type (rowMap)
586  , rowMap_ (rowMap)
587  , colMap_ (colMap)
588  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
589  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
590  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
591  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
592  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
593  , pftype_ (pftype)
594  , numAllocForAllRows_ (0)
595  , storageStatus_ (pftype == StaticProfile ?
596  ::Tpetra::Details::STORAGE_1D_UNPACKED :
597  ::Tpetra::Details::STORAGE_2D)
598  , indicesAreAllocated_ (false)
599  , indicesAreLocal_ (false)
600  , indicesAreGlobal_ (false)
601  , fillComplete_ (false)
602  , lowerTriangular_ (false)
603  , upperTriangular_ (false)
604  , indicesAreSorted_ (true)
605  , noRedundancies_ (true)
606  , haveLocalConstants_ (false)
607  , haveGlobalConstants_ (false)
608  , sortGhostsAssociatedWithEachProcessor_ (true)
609  {
610  const char tfecfFuncName[] = "CrsGraph(rowMap,colMap,numEntPerRow,pftype,"
611  "params): ";
612  staticAssertions ();
613 
614  const size_t lclNumRows = rowMap.is_null () ?
615  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
616  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
617  static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
618  std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
619  << " != the local number of rows " << lclNumRows << " as specified by "
620  "the input row Map.");
621 
622  const bool debug = ::Tpetra::Details::Behavior::debug ();
623  if (debug) {
624  for (size_t r = 0; r < lclNumRows; ++r) {
625  const size_t curRowCount = numEntPerRow[r];
626  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
627  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
628  std::invalid_argument, "numEntPerRow(" << r << ") "
629  "specifies an invalid number of entries "
630  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
631  }
632  }
633 
634  // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
635  // The latter is a const View, so we have to copy into a nonconst
636  // View first, then assign.
637  typedef decltype (k_numAllocPerRow_) out_view_type;
638  typedef typename out_view_type::non_const_type nc_view_type;
639  typedef Kokkos::View<const size_t*,
640  typename nc_view_type::array_layout,
641  Kokkos::HostSpace,
642  Kokkos::MemoryUnmanaged> in_view_type;
643  in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
644  nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
645  lclNumRows);
646  Kokkos::deep_copy (numAllocPerRowOut, numAllocPerRowIn);
647  k_numAllocPerRow_ = numAllocPerRowOut;
648 
649  resumeFill (params);
651  }
652 
653 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
654  template <class LocalOrdinal, class GlobalOrdinal, class Node>
656  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
657  const Teuchos::RCP<const map_type>& colMap,
658  const Teuchos::ArrayRCP<const size_t>& numEntPerRow,
659  const ProfileType pftype,
660  const Teuchos::RCP<Teuchos::ParameterList>& params) :
661  dist_object_type (rowMap)
662  , rowMap_ (rowMap)
663  , colMap_ (colMap)
664  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
665  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
666  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
667  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
668  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
669  , pftype_ (pftype)
670  , numAllocForAllRows_ (0)
671  , storageStatus_ (pftype == StaticProfile ?
672  ::Tpetra::Details::STORAGE_1D_UNPACKED :
673  ::Tpetra::Details::STORAGE_2D)
674  , indicesAreAllocated_ (false)
675  , indicesAreLocal_ (false)
676  , indicesAreGlobal_ (false)
677  , fillComplete_ (false)
678  , lowerTriangular_ (false)
679  , upperTriangular_ (false)
680  , indicesAreSorted_ (true)
681  , noRedundancies_ (true)
682  , haveLocalConstants_ (false)
683  , haveGlobalConstants_ (false)
684  , sortGhostsAssociatedWithEachProcessor_ (true)
685  {
686  const char tfecfFuncName[] = "CrsGraph(RCP<const Map>,RCP<const Map>,"
687  "ArrayRCP<const size_t>,ProfileType,RCP<ParameterList>): ";
688  staticAssertions ();
689 
690  const size_t lclNumRows = rowMap.is_null () ?
691  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
692  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
693  static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
694  std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
695  << " != the local number of rows " << lclNumRows << " as specified by "
696  "the input row Map.");
697 
698  const bool debug = ::Tpetra::Details::Behavior::debug ();
699  if (debug) {
700  for (size_t r = 0; r < lclNumRows; ++r) {
701  const size_t curRowCount = numEntPerRow[r];
702  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
703  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
704  std::invalid_argument, "numEntPerRow(" << r << ") "
705  "specifies an invalid number of entries "
706  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
707  }
708  }
709 
710  // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
711  // The latter is a const View, so we have to copy into a nonconst
712  // View first, then assign.
713  typedef decltype (k_numAllocPerRow_) out_view_type;
714  typedef typename out_view_type::non_const_type nc_view_type;
715  typedef Kokkos::View<const size_t*,
716  typename nc_view_type::array_layout,
717  Kokkos::HostSpace,
718  Kokkos::MemoryUnmanaged> in_view_type;
719  in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
720  nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
721  lclNumRows);
722  Kokkos::deep_copy (numAllocPerRowOut, numAllocPerRowIn);
723  k_numAllocPerRow_ = numAllocPerRowOut;
724 
725  resumeFill (params);
727  }
728 #endif // TPETRA_ENABLE_DEPRECATED_CODE
729 
730  template <class LocalOrdinal, class GlobalOrdinal, class Node>
731  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
732  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
733  const Teuchos::RCP<const map_type>& colMap,
734  const typename local_graph_type::row_map_type& rowPointers,
735  const typename local_graph_type::entries_type::non_const_type& columnIndices,
736  const Teuchos::RCP<Teuchos::ParameterList>& /* params */) :
737  dist_object_type (rowMap)
738  , rowMap_(rowMap)
739  , colMap_(colMap)
740  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
741  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
742  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
743  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
744  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
745  , pftype_(StaticProfile)
747  , storageStatus_ (::Tpetra::Details::STORAGE_1D_PACKED)
748  , indicesAreAllocated_(true)
749  , indicesAreLocal_(true)
750  , indicesAreGlobal_(false)
751  , fillComplete_(false)
752  , lowerTriangular_ (false)
753  , upperTriangular_ (false)
754  , indicesAreSorted_(true)
755  , noRedundancies_(true)
756  , haveLocalConstants_ (false)
757  , haveGlobalConstants_ (false)
759  {
760  staticAssertions ();
761  setAllIndices (rowPointers, columnIndices);
763  }
764 
765  template <class LocalOrdinal, class GlobalOrdinal, class Node>
767  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
768  const Teuchos::RCP<const map_type>& colMap,
769  const Teuchos::ArrayRCP<size_t>& rowPointers,
770  const Teuchos::ArrayRCP<LocalOrdinal> & columnIndices,
771  const Teuchos::RCP<Teuchos::ParameterList>& /* params */) :
772  dist_object_type (rowMap)
773  , rowMap_ (rowMap)
774  , colMap_ (colMap)
775  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
776  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
777  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
778  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
779  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
780  , pftype_ (StaticProfile)
781  , numAllocForAllRows_ (0)
782  , storageStatus_ (::Tpetra::Details::STORAGE_1D_PACKED)
783  , indicesAreAllocated_ (true)
784  , indicesAreLocal_ (true)
785  , indicesAreGlobal_ (false)
786  , fillComplete_ (false)
787  , lowerTriangular_ (false)
788  , upperTriangular_ (false)
789  , indicesAreSorted_ (true)
790  , noRedundancies_ (true)
791  , haveLocalConstants_ (false)
792  , haveGlobalConstants_ (false)
793  , sortGhostsAssociatedWithEachProcessor_ (true)
794  {
795  staticAssertions ();
796  setAllIndices (rowPointers, columnIndices);
798  }
799 
800  template <class LocalOrdinal, class GlobalOrdinal, class Node>
802  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
803  const Teuchos::RCP<const map_type>& colMap,
804  const local_graph_type& k_local_graph_,
805  const Teuchos::RCP<Teuchos::ParameterList>& params)
806  : CrsGraph (k_local_graph_,
807  rowMap,
808  colMap,
809  Teuchos::null,
810  Teuchos::null,
811  params)
812  {}
813 
814  template <class LocalOrdinal, class GlobalOrdinal, class Node>
816  CrsGraph (const local_graph_type& k_local_graph_,
817  const Teuchos::RCP<const map_type>& rowMap,
818  const Teuchos::RCP<const map_type>& colMap,
819  const Teuchos::RCP<const map_type>& domainMap,
820  const Teuchos::RCP<const map_type>& rangeMap,
821  const Teuchos::RCP<Teuchos::ParameterList>& params)
822  : DistObject<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, node_type> (rowMap)
823  , rowMap_ (rowMap)
824  , colMap_ (colMap)
825  , lclGraph_ (k_local_graph_)
826  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
827  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
828  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
829  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
830  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
831  , pftype_ (StaticProfile)
832  , numAllocForAllRows_ (0)
833  , storageStatus_ (::Tpetra::Details::STORAGE_1D_PACKED)
834  , indicesAreAllocated_ (true)
835  , indicesAreLocal_ (true)
836  , indicesAreGlobal_ (false)
837  , fillComplete_ (false)
838  , lowerTriangular_ (false)
839  , upperTriangular_ (false)
840  , indicesAreSorted_ (true)
841  , noRedundancies_ (true)
842  , haveLocalConstants_ (false)
843  , haveGlobalConstants_ (false)
844  , sortGhostsAssociatedWithEachProcessor_ (true)
845  {
846  staticAssertions();
847  const char tfecfFuncName[] = "CrsGraph(Kokkos::LocalStaticCrsGraph,Map,Map,Map,Map)";
848 
849  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
850  colMap.is_null (), std::runtime_error,
851  ": The input column Map must be nonnull.");
852  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
853  k_local_graph_.numRows () != rowMap->getNodeNumElements (),
854  std::runtime_error,
855  ": The input row Map and the input local graph need to have the same "
856  "number of rows. The row Map claims " << rowMap->getNodeNumElements ()
857  << " row(s), but the local graph claims " << k_local_graph_.numRows ()
858  << " row(s).");
859  // NOTE (mfh 17 Mar 2014) getNodeNumRows() returns
860  // rowMap_->getNodeNumElements(), but it doesn't have to.
861  // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
862  // k_local_graph_.numRows () != getNodeNumRows (), std::runtime_error,
863  // ": The input row Map and the input local graph need to have the same "
864  // "number of rows. The row Map claims " << getNodeNumRows () << " row(s), "
865  // "but the local graph claims " << k_local_graph_.numRows () << " row(s).");
866  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
867  k_lclInds1D_.extent (0) != 0 || k_gblInds1D_.extent (0) != 0, std::logic_error,
868  ": cannot have 1D data structures allocated.");
869  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
870  ! lclInds2D_.is_null () || ! gblInds2D_.is_null (), std::logic_error,
871  ": cannot have 2D data structures allocated.");
872 
873  setDomainRangeMaps (domainMap.is_null() ? rowMap_ : domainMap,
874  rangeMap .is_null() ? rowMap_ : rangeMap);
875  Teuchos::Array<int> remotePIDs (0); // unused output argument
876  this->makeImportExport (remotePIDs, false);
877 
878  k_lclInds1D_ = lclGraph_.entries;
879  k_rowPtrs_ = lclGraph_.row_map;
880 
881  const bool callComputeGlobalConstants = params.get () == nullptr ||
882  params->get ("compute global constants", true);
883  const bool computeLocalTriangularConstants = params.get () == nullptr ||
884  params->get ("compute local triangular constants", true);
885 
886  if (callComputeGlobalConstants) {
887  this->computeGlobalConstants (computeLocalTriangularConstants);
888  }
889  this->fillComplete_ = true;
890  this->checkInternalState ();
891  }
892 
893  template <class LocalOrdinal, class GlobalOrdinal, class Node>
894  Teuchos::RCP<const Teuchos::ParameterList>
897  {
898  using Teuchos::RCP;
899  using Teuchos::ParameterList;
900  using Teuchos::parameterList;
901 
902  RCP<ParameterList> params = parameterList ("Tpetra::CrsGraph");
903 
904  // Make a sublist for the Import.
905  RCP<ParameterList> importSublist = parameterList ("Import");
906 
907  // FIXME (mfh 02 Apr 2012) We should really have the Import and
908  // Export objects fill in these lists. However, we don't want to
909  // create an Import or Export unless we need them. For now, we
910  // know that the Import and Export just pass the list directly to
911  // their Distributor, so we can create a Distributor here
912  // (Distributor's constructor is a lightweight operation) and have
913  // it fill in the list.
914 
915  // Fill in Distributor default parameters by creating a
916  // Distributor and asking it to do the work.
917  Distributor distributor (rowMap_->getComm (), importSublist);
918  params->set ("Import", *importSublist, "How the Import performs communication.");
919 
920  // Make a sublist for the Export. For now, it's a clone of the
921  // Import sublist. It's not a shallow copy, though, since we
922  // might like the Import to do communication differently than the
923  // Export.
924  params->set ("Export", *importSublist, "How the Export performs communication.");
925 
926  return params;
927  }
928 
929  template <class LocalOrdinal, class GlobalOrdinal, class Node>
930  void
932  setParameterList (const Teuchos::RCP<Teuchos::ParameterList>& params)
933  {
934  Teuchos::RCP<const Teuchos::ParameterList> validParams =
935  getValidParameters ();
936  params->validateParametersAndSetDefaults (*validParams);
937  this->setMyParamList (params);
938  }
939 
940  template <class LocalOrdinal, class GlobalOrdinal, class Node>
944  {
945  return rowMap_->getGlobalNumElements ();
946  }
947 
948  template <class LocalOrdinal, class GlobalOrdinal, class Node>
952  {
953  const char tfecfFuncName[] = "getGlobalNumCols: ";
954  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
955  ! isFillComplete () || getDomainMap ().is_null (), std::runtime_error,
956  "The graph does not have a domain Map. You may not call this method in "
957  "that case.");
958  return getDomainMap ()->getGlobalNumElements ();
959  }
960 
961  template <class LocalOrdinal, class GlobalOrdinal, class Node>
962  size_t
965  {
966  return this->rowMap_.is_null () ?
967  static_cast<size_t> (0) :
968  this->rowMap_->getNodeNumElements ();
969  }
970 
971  template <class LocalOrdinal, class GlobalOrdinal, class Node>
972  size_t
975  {
976  const char tfecfFuncName[] = "getNodeNumCols: ";
977  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
978  ! hasColMap (), std::runtime_error,
979  "The graph does not have a column Map. You may not call this method "
980  "unless the graph has a column Map. This requires either that a custom "
981  "column Map was given to the constructor, or that fillComplete() has "
982  "been called.");
983  return colMap_.is_null () ? static_cast<size_t> (0) :
984  colMap_->getNodeNumElements ();
985  }
986 
987 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
988  template <class LocalOrdinal, class GlobalOrdinal, class Node>
989  global_size_t TPETRA_DEPRECATED
991  getGlobalNumDiags () const
992  {
993  return this->getGlobalNumDiagsImpl ();
994  }
995 
996  template <class LocalOrdinal, class GlobalOrdinal, class Node>
997  size_t TPETRA_DEPRECATED
998  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
999  getNodeNumDiags () const
1000  {
1001  return this->getNodeNumDiagsImpl ();
1002  }
1003 
1004  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1006  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1007  getGlobalNumDiagsImpl () const
1008  {
1009  const char tfecfFuncName[] = "getGlobalNumDiags: ";
1010  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1011  (! this->haveGlobalConstants_, std::logic_error,
1012  "The graph does not have global constants computed, "
1013  "but the user has requested them.");
1014 
1015  return globalNumDiags_;
1016  }
1017 
1018  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1019  size_t
1020  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1021  getNodeNumDiagsImpl () const
1022  {
1023  return nodeNumDiags_;
1024  }
1025 #endif // TPETRA_ENABLE_DEPRECATED_CODE
1026 
1027 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
1028  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1029  TPETRA_DEPRECATED
1030  Teuchos::RCP<Node>
1031  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1032  getNode () const
1033  {
1034  return rowMap_.is_null () ? Teuchos::null : rowMap_->getNode ();
1035  }
1036 #endif // TPETRA_ENABLE_DEPRECATED_CODE
1037 
1038  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1039  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
1041  getRowMap () const
1042  {
1043  return rowMap_;
1044  }
1045 
1046  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1047  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
1049  getColMap () const
1050  {
1051  return colMap_;
1052  }
1053 
1054  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1055  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
1058  {
1059  return domainMap_;
1060  }
1061 
1062  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1063  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
1066  {
1067  return rangeMap_;
1068  }
1069 
1070  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1071  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::import_type>
1074  {
1075  return importer_;
1076  }
1077 
1078  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1079  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::export_type>
1082  {
1083  return exporter_;
1084  }
1085 
1086  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1087  bool
1089  hasColMap () const
1090  {
1091  return ! colMap_.is_null ();
1092  }
1093 
1094  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1095  bool
1098  {
1099  // FIXME (mfh 07 Aug 2014) Why wouldn't storage be optimized if
1100  // getNodeNumRows() is zero?
1101 
1102  const bool isOpt = indicesAreAllocated_ &&
1103  k_numRowEntries_.extent (0) == 0 &&
1104  getNodeNumRows () > 0;
1105 
1106  const char tfecfFuncName[] = "isStorageOptimized: ";
1107  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1108  (isOpt && getProfileType () != StaticProfile, std::logic_error,
1109  "The matrix claims to have optimized storage, but getProfileType() "
1110  "returns DynamicProfile. This should never happen. Please report this "
1111  "bug to the Tpetra developers.");
1112 
1113  return isOpt;
1114  }
1115 
1116  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1117  ProfileType
1120  {
1121  return pftype_;
1122  }
1123 
1124  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1128  {
1129  const char tfecfFuncName[] = "getGlobalNumEntries: ";
1130  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1131  (! this->haveGlobalConstants_, std::logic_error,
1132  "The graph does not have global constants computed, "
1133  "but the user has requested them.");
1134 
1135  return globalNumEntries_;
1136  }
1137 
1138  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1139  size_t
1142  {
1143  typedef LocalOrdinal LO;
1144 
1145  if (this->indicesAreAllocated_) {
1146  const LO lclNumRows = this->getNodeNumRows ();
1147  if (lclNumRows == 0) {
1148  return static_cast<size_t> (0);
1149  }
1150  else {
1151  // Avoid the "*this capture" issue by creating a local Kokkos::View.
1152  auto numEntPerRow = this->k_numRowEntries_;
1153  const LO numNumEntPerRow = numEntPerRow.extent (0);
1154  if (numNumEntPerRow == 0) {
1155  if (static_cast<LO> (this->lclGraph_.row_map.extent (0)) <
1156  static_cast<LO> (lclNumRows + 1)) {
1157  return static_cast<size_t> (0);
1158  }
1159  else {
1160  return ::Tpetra::Details::getEntryOnHost (this->lclGraph_.row_map, lclNumRows);
1161  }
1162  }
1163  else { // k_numRowEntries_ is populated
1164  // k_numRowEntries_ is actually be a host View, so we run
1165  // the sum in its native execution space. This also means
1166  // that we can use explicit capture (which could perhaps
1167  // improve build time) instead of KOKKOS_LAMBDA, and avoid
1168  // any CUDA build issues with trying to run a __device__ -
1169  // only function on host.
1170  typedef typename num_row_entries_type::execution_space
1171  host_exec_space;
1172  typedef Kokkos::RangePolicy<host_exec_space, LO> range_type;
1173 
1174  const LO upperLoopBound = lclNumRows < numNumEntPerRow ?
1175  lclNumRows :
1176  numNumEntPerRow;
1177  size_t nodeNumEnt = 0;
1178  Kokkos::parallel_reduce ("Tpetra::CrsGraph::getNumNodeEntries",
1179  range_type (0, upperLoopBound),
1180  [=] (const LO& k, size_t& lclSum) {
1181  lclSum += numEntPerRow(k);
1182  }, nodeNumEnt);
1183  return nodeNumEnt;
1184  }
1185  }
1186  }
1187  else { // nothing allocated on this process, so no entries
1188  return static_cast<size_t> (0);
1189  }
1190  }
1191 
1192  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1196  {
1197  const char tfecfFuncName[] = "getGlobalMaxNumRowEntries: ";
1198  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1199  (! this->haveGlobalConstants_, std::logic_error,
1200  "The graph does not have global constants computed, "
1201  "but the user has requested them.");
1202 
1203  return globalMaxNumRowEntries_;
1204  }
1205 
1206  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1207  size_t
1210  {
1211  return nodeMaxNumRowEntries_;
1212  }
1213 
1214  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1215  bool
1218  {
1219  return fillComplete_;
1220  }
1221 
1222  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1223  bool
1226  {
1227  return ! fillComplete_;
1228  }
1229 
1230 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
1231  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1232  bool TPETRA_DEPRECATED
1234  isLowerTriangular () const
1235  {
1236  return this->isLowerTriangularImpl ();
1237  }
1238 
1239  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1240  bool TPETRA_DEPRECATED
1241  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1242  isUpperTriangular () const
1243  {
1244  return this->isUpperTriangularImpl ();
1245  }
1246 
1247  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1248  bool
1249  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1250  isLowerTriangularImpl () const
1251  {
1252  return this->lowerTriangular_;
1253  }
1254 
1255  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1256  bool
1257  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1258  isUpperTriangularImpl () const
1259  {
1260  return this->upperTriangular_;
1261  }
1262 #endif // TPETRA_ENABLE_DEPRECATED_CODE
1263 
1264  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1265  bool
1268  {
1269  return indicesAreLocal_;
1270  }
1271 
1272  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1273  bool
1276  {
1277  return indicesAreGlobal_;
1278  }
1279 
1280  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1281  size_t
1284  {
1285  typedef LocalOrdinal LO;
1286 
1287  if (this->indicesAreAllocated_) {
1288  const LO lclNumRows = this->getNodeNumRows ();
1289  if (lclNumRows == 0) {
1290  return static_cast<size_t> (0);
1291  }
1292  else if (this->storageStatus_ == ::Tpetra::Details::STORAGE_1D_PACKED) {
1293  if (static_cast<LO> (this->lclGraph_.row_map.extent (0)) <
1294  static_cast<LO> (lclNumRows + 1)) {
1295  return static_cast<size_t> (0);
1296  }
1297  else {
1298  return ::Tpetra::Details::getEntryOnHost (this->lclGraph_.row_map, lclNumRows);
1299  }
1300  }
1301  else if (this->storageStatus_ == ::Tpetra::Details::STORAGE_1D_UNPACKED) {
1302  if (this->k_rowPtrs_.extent (0) == 0) {
1303  return static_cast<size_t> (0);
1304  }
1305  else {
1306  return ::Tpetra::Details::getEntryOnHost (this->k_rowPtrs_, lclNumRows);
1307  }
1308  }
1309  else if (this->storageStatus_ == ::Tpetra::Details::STORAGE_2D) {
1310  size_t numAllocated = 0;
1311  if (this->isLocallyIndexed ()) {
1312  for (LocalOrdinal lclRow = 0; lclRow < lclNumRows; ++lclRow) {
1313  numAllocated += this->lclInds2D_[lclRow].size ();
1314  }
1315  }
1316  else if (this->isGloballyIndexed ()) {
1317  for (LocalOrdinal lclRow = 0; lclRow < lclNumRows; ++lclRow) {
1318  numAllocated += this->gblInds2D_[lclRow].size ();
1319  }
1320  }
1321  // Neither locally nor globally indexed, means no indices allocated.
1322  return numAllocated;
1323  }
1324  else {
1325  return static_cast<size_t> (0);
1326  }
1327  }
1328  else {
1329  return Tpetra::Details::OrdinalTraits<size_t>::invalid ();
1330  }
1331  }
1332 
1333  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1334  Teuchos::RCP<const Teuchos::Comm<int> >
1336  getComm () const
1337  {
1338  return this->rowMap_.is_null () ? Teuchos::null : this->rowMap_->getComm ();
1339  }
1340 
1341  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1342  GlobalOrdinal
1345  {
1346  return rowMap_->getIndexBase ();
1347  }
1348 
1349  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1350  bool
1352  indicesAreAllocated () const
1353  {
1354  return indicesAreAllocated_;
1355  }
1356 
1357  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1358  bool
1360  isSorted () const
1361  {
1362  return indicesAreSorted_;
1363  }
1364 
1365  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1366  bool
1368  isMerged () const
1369  {
1370  return noRedundancies_;
1371  }
1372 
1373  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1374  void
1377  {
1378  // FIXME (mfh 07 May 2013) How do we know that the change
1379  // introduced a redundancy, or even that it invalidated the sorted
1380  // order of indices? CrsGraph has always made this conservative
1381  // guess. It could be a bit costly to check at insertion time,
1382  // though.
1383  indicesAreSorted_ = false;
1384  noRedundancies_ = false;
1385 
1386  // We've modified the graph, so we'll have to recompute local
1387  // constants like the number of diagonal entries on this process.
1388  haveLocalConstants_ = false;
1389  }
1390 
1391  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1392  void
1394  allocateIndices (const ELocalGlobal lg)
1395  {
1396  using Teuchos::arcp;
1397  using Teuchos::Array;
1398  using Teuchos::ArrayRCP;
1399  typedef Teuchos::ArrayRCP<size_t>::size_type size_type;
1400  typedef typename local_graph_type::row_map_type::non_const_type
1401  non_const_row_map_type;
1402  typedef typename local_graph_type::entries_type::non_const_type
1403  lcl_col_inds_type;
1404  typedef Kokkos::View<GlobalOrdinal*,
1405  typename lcl_col_inds_type::array_layout,
1406  device_type> gbl_col_inds_type;
1407  const char tfecfFuncName[] = "allocateIndices: ";
1408  const char suffix[] = " Please report this bug to the Tpetra developers.";
1409 
1410  // This is a protected function, only callable by us. If it was
1411  // called incorrectly, it is our fault. That's why the tests
1412  // below throw std::logic_error instead of std::invalid_argument.
1413  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1414  (this->isLocallyIndexed () && lg == GlobalIndices, std::logic_error,
1415  "The graph is locally indexed, but Tpetra code is calling this method "
1416  "with lg=GlobalIndices." << suffix);
1417  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1418  (this->isGloballyIndexed () && lg == LocalIndices, std::logic_error,
1419  "The graph is globally indexed, but Tpetra code is calling this method "
1420  "with lg=LocalIndices. " << suffix);
1421  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1422  (this->indicesAreAllocated (), std::logic_error, "The graph's indices "
1423  "are already allocated, but Tpetra is calling allocateIndices again."
1424  << suffix);
1425  const size_t numRows = this->getNodeNumRows ();
1426 
1427  if (this->getProfileType () == StaticProfile) {
1428  //
1429  // STATIC ALLOCATION PROFILE
1430  //
1431  non_const_row_map_type k_rowPtrs ("Tpetra::CrsGraph::ptr", numRows + 1);
1432 
1433  if (this->k_numAllocPerRow_.extent (0) != 0) {
1434  // It's OK to throw std::invalid_argument here, because we
1435  // haven't incurred any side effects yet. Throwing that
1436  // exception (and not, say, std::logic_error) implies that the
1437  // instance can recover.
1438  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1439  (this->k_numAllocPerRow_.extent (0) != numRows,
1440  std::invalid_argument, "k_numAllocPerRow_ is allocated, that is, "
1441  "has nonzero length " << this->k_numAllocPerRow_.extent (0)
1442  << ", but its length != numRows = " << numRows << ".");
1443 
1444  // k_numAllocPerRow_ is a host View, but k_rowPtrs (the thing
1445  // we want to compute here) lives on device. That's OK;
1446  // computeOffsetsFromCounts can handle this case.
1448 
1449  // FIXME (mfh 27 Jun 2016) Currently, computeOffsetsFromCounts
1450  // doesn't attempt to check its input for "invalid" flag
1451  // values. For now, we omit that feature of the sequential
1452  // code disabled below.
1453  computeOffsetsFromCounts (k_rowPtrs, k_numAllocPerRow_);
1454  }
1455  else {
1456  // It's OK to throw std::invalid_argument here, because we
1457  // haven't incurred any side effects yet. Throwing that
1458  // exception (and not, say, std::logic_error) implies that the
1459  // instance can recover.
1460  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1461  (this->numAllocForAllRows_ ==
1462  Tpetra::Details::OrdinalTraits<size_t>::invalid (),
1463  std::invalid_argument, "numAllocForAllRows_ has an invalid value, "
1464  "namely Tpetra::Details::OrdinalTraits<size_t>::invalid() = " <<
1465  Tpetra::Details::OrdinalTraits<size_t>::invalid () << ".");
1466 
1468  computeOffsetsFromConstantCount (k_rowPtrs, this->numAllocForAllRows_);
1469  }
1470 
1471  // "Commit" the resulting row offsets.
1472  this->k_rowPtrs_ = k_rowPtrs;
1473 
1474  const size_type numInds = ::Tpetra::Details::getEntryOnHost (this->k_rowPtrs_, numRows);
1475  // const size_type numInds = static_cast<size_type> (this->k_rowPtrs_(numRows));
1476  if (lg == LocalIndices) {
1477  k_lclInds1D_ = lcl_col_inds_type ("Tpetra::CrsGraph::ind", numInds);
1478  }
1479  else {
1480  k_gblInds1D_ = gbl_col_inds_type ("Tpetra::CrsGraph::ind", numInds);
1481  }
1482  storageStatus_ = ::Tpetra::Details::STORAGE_1D_UNPACKED;
1483  }
1484  else {
1485  //
1486  // DYNAMIC ALLOCATION PROFILE
1487  //
1488  const bool useNumAllocPerRow =
1489  (this->k_numAllocPerRow_.extent (0) != 0);
1490 
1491  if (lg == LocalIndices) {
1492  this->lclInds2D_ = arcp<Array<LocalOrdinal> > (numRows);
1493  for (size_t i = 0; i < numRows; ++i) {
1494  const size_t howMany = useNumAllocPerRow ?
1495  this->k_numAllocPerRow_(i) :
1496  this->numAllocForAllRows_;
1497  if (howMany > 0) {
1498  this->lclInds2D_[i].resize (howMany);
1499  }
1500  }
1501  }
1502  else { // allocate global indices
1503  this->gblInds2D_ = arcp<Array<GlobalOrdinal> > (numRows);
1504  for (size_t i = 0; i < numRows; ++i) {
1505  const size_t howMany = useNumAllocPerRow ?
1506  this->k_numAllocPerRow_(i) :
1507  this->numAllocForAllRows_;
1508  if (howMany > 0) {
1509  this->gblInds2D_[i].resize (howMany);
1510  }
1511  }
1512  }
1513  this->storageStatus_ = ::Tpetra::Details::STORAGE_2D;
1514  }
1515 
1516  this->indicesAreLocal_ = (lg == LocalIndices);
1517  this->indicesAreGlobal_ = (lg == GlobalIndices);
1518 
1519  if (numRows > 0) { // reallocate k_numRowEntries_ & fill w/ 0s
1520  using Kokkos::ViewAllocateWithoutInitializing;
1521  typedef decltype (k_numRowEntries_) row_ent_type;
1522  const char label[] = "Tpetra::CrsGraph::numRowEntries";
1523 
1524  row_ent_type numRowEnt (ViewAllocateWithoutInitializing (label), numRows);
1525  Kokkos::deep_copy (numRowEnt, static_cast<size_t> (0)); // fill w/ 0s
1526  this->k_numRowEntries_ = numRowEnt; // "commit" our allocation
1527  }
1528 
1529  // Once indices are allocated, CrsGraph needs to free this information.
1530  this->numAllocForAllRows_ = 0;
1531  this->k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
1532  this->indicesAreAllocated_ = true;
1533 
1534  try {
1535  this->checkInternalState ();
1536  }
1537  catch (std::logic_error& e) {
1538  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1539  (true, std::logic_error, "At end of allocateIndices, "
1540  "checkInternalState threw std::logic_error: "
1541  << e.what ());
1542  }
1543  catch (std::exception& e) {
1544  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1545  (true, std::runtime_error, "At end of allocateIndices, "
1546  "checkInternalState threw std::exception: "
1547  << e.what ());
1548  }
1549  catch (...) {
1550  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1551  (true, std::runtime_error, "At end of allocateIndices, "
1552  "checkInternalState threw an exception "
1553  "not a subclass of std::exception.");
1554  }
1555  }
1556 
1557  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1558  Teuchos::ArrayView<const LocalOrdinal>
1560  getLocalView (const RowInfo& rowinfo) const
1561  {
1562  using Kokkos::subview;
1563  typedef LocalOrdinal LO;
1564  typedef Kokkos::View<const LO*, execution_space,
1565  Kokkos::MemoryUnmanaged> row_view_type;
1566 
1567  if (rowinfo.allocSize == 0) {
1568  return Teuchos::ArrayView<const LO> ();
1569  }
1570  else { // nothing in the row to view
1571  if (k_lclInds1D_.extent (0) != 0) { // 1-D storage
1572  const size_t start = rowinfo.offset1D;
1573  const size_t len = rowinfo.allocSize;
1574  const std::pair<size_t, size_t> rng (start, start + len);
1575  // mfh 23 Nov 2015: Don't just create a subview of
1576  // k_lclInds1D_ directly, because that first creates a
1577  // _managed_ subview, then returns an unmanaged version of
1578  // that. That touches the reference count, which costs
1579  // performance in a measurable way.
1580  row_view_type rowView = subview (row_view_type (k_lclInds1D_), rng);
1581  const LO* const rowViewRaw = (len == 0) ? nullptr : rowView.data ();
1582  return Teuchos::ArrayView<const LO> (rowViewRaw, len, Teuchos::RCP_DISABLE_NODE_LOOKUP);
1583  }
1584  else if (! lclInds2D_[rowinfo.localRow].empty ()) { // 2-D storage
1585  return lclInds2D_[rowinfo.localRow] ();
1586  }
1587  else {
1588  return Teuchos::ArrayView<const LO> (); // nothing in the row to view
1589  }
1590  }
1591  }
1592 
1593  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1594  LocalOrdinal
1596  getLocalViewRawConst (const LocalOrdinal*& lclInds,
1597  LocalOrdinal& capacity,
1598  const RowInfo& rowInfo) const
1599  {
1600  lclInds = nullptr;
1601  capacity = 0;
1602 #ifdef HAVE_TPETRA_DEBUG
1603  constexpr bool debug = true;
1604 #else
1605  constexpr bool debug = false;
1606 #endif // HAVE_TPETRA_DEBUG
1607 
1608  if (rowInfo.allocSize != 0) {
1609  if (k_lclInds1D_.extent (0) != 0) { // 1-D storage
1610  if (debug) {
1611  if (rowInfo.offset1D + rowInfo.allocSize >
1612  static_cast<size_t> (k_lclInds1D_.extent (0))) {
1613  return static_cast<LocalOrdinal> (-1);
1614  }
1615  }
1616  lclInds = &k_lclInds1D_[rowInfo.offset1D];
1617  capacity = rowInfo.allocSize;
1618  }
1619  else { // 2-D storage
1620  if (debug) {
1621  if (rowInfo.localRow >= static_cast<size_t> (lclInds2D_.size ())) {
1622  return static_cast<LocalOrdinal> (-1);
1623  }
1624  }
1625  // Use a const reference so we don't touch the ArrayRCP's ref
1626  // count, since ArrayRCP's ref count is not thread safe.
1627  const auto& curRow = lclInds2D_[rowInfo.localRow];
1628  if (! curRow.empty ()) {
1629  lclInds = curRow.getRawPtr ();
1630  capacity = curRow.size ();
1631  }
1632  }
1633  }
1634  return static_cast<LocalOrdinal> (0);
1635  }
1636 
1637  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1638  Teuchos::ArrayView<LocalOrdinal>
1641  {
1642  using Kokkos::subview;
1643  typedef LocalOrdinal LO;
1644  typedef Kokkos::View<LO*, execution_space,
1645  Kokkos::MemoryUnmanaged> row_view_type;
1646 
1647  if (rowinfo.allocSize == 0) { // nothing in the row to view
1648  return Teuchos::ArrayView<LO> ();
1649  }
1650  else {
1651  if (k_lclInds1D_.extent (0) != 0) { // 1-D storage
1652  const size_t start = rowinfo.offset1D;
1653  const size_t len = rowinfo.allocSize;
1654  const std::pair<size_t, size_t> rng (start, start + len);
1655  // mfh 23 Nov 2015: Don't just create a subview of
1656  // k_lclInds1D_ directly, because that first creates a
1657  // _managed_ subview, then returns an unmanaged version of
1658  // that. That touches the reference count, which costs
1659  // performance in a measurable way.
1660  row_view_type rowView = subview (row_view_type (k_lclInds1D_), rng);
1661  LO* const rowViewRaw = (len == 0) ? nullptr : rowView.data ();
1662  return Teuchos::ArrayView<LO> (rowViewRaw, len, Teuchos::RCP_DISABLE_NODE_LOOKUP);
1663  }
1664  else if (! lclInds2D_[rowinfo.localRow].empty ()) { // 2-D storage
1665  return lclInds2D_[rowinfo.localRow] ();
1666  }
1667  else {
1668  return Teuchos::ArrayView<LO> (); // nothing in the row to view
1669  }
1670  }
1671  }
1672 
1673 
1674  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1675  Kokkos::View<const LocalOrdinal*,
1677  Kokkos::MemoryUnmanaged>
1679  getLocalKokkosRowView (const RowInfo& rowInfo) const
1680  {
1681  typedef LocalOrdinal LO;
1682  typedef Kokkos::View<const LO*, execution_space,
1683  Kokkos::MemoryUnmanaged> row_view_type;
1684 
1685  if (rowInfo.allocSize == 0) {
1686  return row_view_type ();
1687  }
1688  else { // nothing in the row to view
1689  if (k_lclInds1D_.extent (0) != 0) { // 1-D storage
1690  const size_t start = rowInfo.offset1D;
1691  const size_t len = rowInfo.allocSize;
1692  const std::pair<size_t, size_t> rng (start, start + len);
1693  // mfh 23 Nov 2015: Don't just create a subview of
1694  // k_lclInds1D_ directly, because that first creates a
1695  // _managed_ subview, then returns an unmanaged version of
1696  // that. That touches the reference count, which costs
1697  // performance in a measurable way.
1698  return Kokkos::subview (row_view_type (k_lclInds1D_), rng);
1699  }
1700  else if (! this->lclInds2D_[rowInfo.localRow].empty ()) { // 2-D storage
1701  // Use a reference, so that I don't touch the
1702  // Teuchos::ArrayView reference count in a debug build. (It
1703  // has no reference count in a release build.) This ensures
1704  // thread safety.
1705  //
1706  // lclInds2D_ lives on host, so this code does not assume UVM.
1707  Teuchos::Array<LO>& lclInds = this->lclInds2D_[rowInfo.localRow];
1708  return row_view_type (lclInds.getRawPtr (), lclInds.size ());
1709  }
1710  else {
1711  return row_view_type (); // nothing in the row to view
1712  }
1713  }
1714  }
1715 
1716 
1717  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1718  Kokkos::View<LocalOrdinal*,
1719  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::execution_space,
1720  Kokkos::MemoryUnmanaged>
1721  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1722  getLocalKokkosRowViewNonConst (const RowInfo& rowInfo)
1723  {
1724  typedef LocalOrdinal LO;
1725  typedef Kokkos::View<LO*, execution_space,
1726  Kokkos::MemoryUnmanaged> row_view_type;
1727 
1728  if (rowInfo.allocSize == 0) {
1729  return row_view_type ();
1730  }
1731  else { // nothing in the row to view
1732  if (k_lclInds1D_.extent (0) != 0) { // 1-D storage
1733  const size_t start = rowInfo.offset1D;
1734  const size_t len = rowInfo.allocSize;
1735  const std::pair<size_t, size_t> rng (start, start + len);
1736  // mfh 23 Nov 2015: Don't just create a subview of
1737  // k_lclInds1D_ directly, because that first creates a
1738  // _managed_ subview, then returns an unmanaged version of
1739  // that. That touches the reference count, which costs
1740  // performance in a measurable way.
1741  return Kokkos::subview (row_view_type (this->k_lclInds1D_), rng);
1742  }
1743  else if (! this->lclInds2D_[rowInfo.localRow].empty ()) { // 2-D storage
1744  // Use a reference, so that I don't touch the
1745  // Teuchos::ArrayView reference count in a debug build. (It
1746  // has no reference count in a release build.) This ensures
1747  // thread safety.
1748  //
1749  // lclInds2D_ lives on host, so this code does not assume UVM.
1750  Teuchos::Array<LO>& cols = this->lclInds2D_[rowInfo.localRow];
1751  LO* const colsRaw = cols.getRawPtr ();
1752  return row_view_type (colsRaw, cols.size ());
1753  }
1754  else {
1755  return row_view_type (); // nothing in the row to view
1756  }
1757  }
1758  }
1759 
1760 
1761  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1762  Kokkos::View<const GlobalOrdinal*,
1763  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::execution_space,
1764  Kokkos::MemoryUnmanaged>
1765  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1766  getGlobalKokkosRowView (const RowInfo& rowinfo) const
1767  {
1768  typedef GlobalOrdinal GO;
1769  typedef Kokkos::View<const GO*, execution_space,
1770  Kokkos::MemoryUnmanaged> row_view_type;
1771 
1772  if (rowinfo.allocSize == 0) {
1773  return row_view_type ();
1774  }
1775  else { // nothing in the row to view
1776  if (this->k_gblInds1D_.extent (0) != 0) { // 1-D storage
1777  const size_t start = rowinfo.offset1D;
1778  const size_t len = rowinfo.allocSize;
1779  const std::pair<size_t, size_t> rng (start, start + len);
1780  // mfh 23 Nov 2015: Don't just create a subview of
1781  // k_gblInds1D_ directly, because that first creates a
1782  // _managed_ subview, then returns an unmanaged version of
1783  // that. That touches the reference count, which costs
1784  // performance in a measurable way.
1785  return Kokkos::subview (row_view_type (this->k_gblInds1D_), rng);
1786  }
1787  else if (! this->gblInds2D_[rowinfo.localRow].empty ()) { // 2-D storage
1788  // Use a reference, so that I don't touch the
1789  // Teuchos::ArrayView reference count in a debug build. (It
1790  // has no reference count in a release build.) This ensures
1791  // thread safety.
1792  //
1793  // gblInds2D_ lives on host, so this code does not assume UVM.
1794  Teuchos::Array<GO>& cols = this->gblInds2D_[rowinfo.localRow];
1795  return row_view_type (cols.getRawPtr (), cols.size ());
1796  }
1797  else {
1798  return row_view_type (); // nothing in the row to view
1799  }
1800  }
1801  }
1802 
1803 
1804  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1805  Teuchos::ArrayView<const GlobalOrdinal>
1807  getGlobalView (const RowInfo& rowinfo) const
1808  {
1809  Teuchos::ArrayView<const GlobalOrdinal> view;
1810  if (rowinfo.allocSize > 0) {
1811  if (k_gblInds1D_.extent (0) != 0) {
1812  auto rng = std::make_pair (rowinfo.offset1D,
1813  rowinfo.offset1D + rowinfo.allocSize);
1814  // mfh 23 Nov 2015: Don't just create a subview of
1815  // k_gblInds1D_ directly, because that first creates a
1816  // _managed_ subview, then returns an unmanaged version of
1817  // that. That touches the reference count, which costs
1818  // performance in a measurable way.
1819  Kokkos::View<const GlobalOrdinal*, execution_space,
1820  Kokkos::MemoryUnmanaged> k_gblInds1D_unmanaged = k_gblInds1D_;
1821  view = Kokkos::Compat::getConstArrayView (Kokkos::subview (k_gblInds1D_unmanaged, rng));
1822  }
1823  else if (! gblInds2D_[rowinfo.localRow].empty()) {
1824  view = gblInds2D_[rowinfo.localRow] ();
1825  }
1826  }
1827  return view;
1828  }
1829 
1830 
1831  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1832  LocalOrdinal
1834  getGlobalViewRawConst (const GlobalOrdinal*& gblInds,
1835  LocalOrdinal& capacity,
1836  const RowInfo& rowInfo) const
1837  {
1838  gblInds = nullptr;
1839  capacity = 0;
1840 #ifdef HAVE_TPETRA_DEBUG
1841  constexpr bool debug = true;
1842 #else
1843  constexpr bool debug = false;
1844 #endif // HAVE_TPETRA_DEBUG
1845 
1846  if (rowInfo.allocSize != 0) {
1847  if (k_gblInds1D_.extent (0) != 0) { // 1-D storage
1848  if (debug) {
1849  if (rowInfo.offset1D + rowInfo.allocSize >
1850  static_cast<size_t> (k_gblInds1D_.extent (0))) {
1851  return static_cast<LocalOrdinal> (-1);
1852  }
1853  }
1854  gblInds = &k_gblInds1D_[rowInfo.offset1D];
1855  capacity = rowInfo.allocSize;
1856  }
1857  else {
1858  if (debug) {
1859  if (rowInfo.localRow >= static_cast<size_t> (gblInds2D_.size ())) {
1860  return static_cast<LocalOrdinal> (-1);
1861  }
1862  }
1863  const auto& curRow = gblInds2D_[rowInfo.localRow];
1864  if (! curRow.empty ()) {
1865  gblInds = curRow.getRawPtr ();
1866  capacity = curRow.size ();
1867  }
1868  }
1869  }
1870  return static_cast<LocalOrdinal> (0);
1871  }
1872 
1873 
1874  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1875  Teuchos::ArrayView<GlobalOrdinal>
1878  {
1879  Teuchos::ArrayView<GlobalOrdinal> view;
1880  if (rowinfo.allocSize > 0) {
1881  if (k_gblInds1D_.extent (0) != 0) {
1882  auto rng = std::make_pair (rowinfo.offset1D,
1883  rowinfo.offset1D + rowinfo.allocSize);
1884  // mfh 23 Nov 2015: Don't just create a subview of
1885  // k_gblInds1D_ directly, because that first creates a
1886  // _managed_ subview, then returns an unmanaged version of
1887  // that. That touches the reference count, which costs
1888  // performance in a measurable way.
1889  Kokkos::View<GlobalOrdinal*, execution_space,
1890  Kokkos::MemoryUnmanaged> k_gblInds1D_unmanaged = k_gblInds1D_;
1891  view = Kokkos::Compat::getArrayView (Kokkos::subview (k_gblInds1D_unmanaged, rng));
1892  }
1893  else if (! gblInds2D_[rowinfo.localRow].empty()) {
1894  view = gblInds2D_[rowinfo.localRow] ();
1895  }
1896  }
1897  return view;
1898  }
1899 
1900 
1901  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1902  RowInfo
1904  getRowInfo (const LocalOrdinal myRow) const
1905  {
1906  const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1907  RowInfo ret;
1908  if (this->rowMap_.is_null () || ! this->rowMap_->isNodeLocalElement (myRow)) {
1909  ret.localRow = STINV;
1910  ret.allocSize = 0;
1911  ret.numEntries = 0;
1912  ret.offset1D = STINV;
1913  return ret;
1914  }
1915 
1916  ret.localRow = static_cast<size_t> (myRow);
1917  if (this->indicesAreAllocated ()) {
1918  if (this->getProfileType () == StaticProfile) {
1919  // Offsets tell us the allocation size in this case.
1920  if (this->k_rowPtrs_.extent (0) == 0) {
1921  ret.offset1D = 0;
1922  ret.allocSize = 0;
1923  }
1924  else {
1925  ret.offset1D = this->k_rowPtrs_(myRow);
1926  ret.allocSize = this->k_rowPtrs_(myRow+1) - this->k_rowPtrs_(myRow);
1927  }
1928 
1929  ret.numEntries = (this->k_numRowEntries_.extent (0) == 0) ?
1930  ret.allocSize :
1931  this->k_numRowEntries_(myRow);
1932  }
1933  else { // DynamicProfile
1934  ret.offset1D = STINV;
1935  if (this->isLocallyIndexed ()) {
1936  ret.allocSize = (this->lclInds2D_.size () == 0) ?
1937  size_t (0) :
1938  this->lclInds2D_[myRow].size ();
1939  }
1940  else if (this->isGloballyIndexed ()) {
1941  ret.allocSize = (this->gblInds2D_.size () == 0) ?
1942  size_t (0) :
1943  this->gblInds2D_[myRow].size ();
1944  }
1945  else { // neither locally nor globally indexed means no indices alloc'd
1946  ret.allocSize = 0;
1947  }
1948 
1949  ret.numEntries = (this->k_numRowEntries_.extent (0) == 0) ?
1950  size_t (0) :
1951  this->k_numRowEntries_(myRow);
1952  }
1953  }
1954  else { // haven't performed allocation yet; probably won't hit this code
1955  // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1956  // allocate, rather than doing lazy allocation at first insert.
1957  // This will make k_numAllocPerRow_ obsolete.
1958  ret.allocSize = (this->k_numAllocPerRow_.extent (0) != 0) ?
1959  this->k_numAllocPerRow_(myRow) : // this is a host View
1960  this->numAllocForAllRows_;
1961  ret.numEntries = 0;
1962  ret.offset1D = STINV;
1963  }
1964 
1965  return ret;
1966  }
1967 
1968 
1969  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1970  RowInfo
1972  getRowInfoFromGlobalRowIndex (const GlobalOrdinal gblRow) const
1973  {
1974  const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1975  RowInfo ret;
1976  if (this->rowMap_.is_null ()) {
1977  ret.localRow = STINV;
1978  ret.allocSize = 0;
1979  ret.numEntries = 0;
1980  ret.offset1D = STINV;
1981  return ret;
1982  }
1983  const LocalOrdinal myRow = this->rowMap_->getLocalElement (gblRow);
1984  if (myRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid ()) {
1985  ret.localRow = STINV;
1986  ret.allocSize = 0;
1987  ret.numEntries = 0;
1988  ret.offset1D = STINV;
1989  return ret;
1990  }
1991 
1992  ret.localRow = static_cast<size_t> (myRow);
1993  if (this->indicesAreAllocated ()) {
1994  // graph data structures have the info that we need
1995  //
1996  // if static graph, offsets tell us the allocation size
1997  if (this->getProfileType() == StaticProfile) {
1998  if (this->k_rowPtrs_.extent (0) == 0) {
1999  ret.offset1D = 0;
2000  ret.allocSize = 0;
2001  }
2002  else {
2003  ret.offset1D = this->k_rowPtrs_(myRow);
2004  ret.allocSize = this->k_rowPtrs_(myRow+1) - this->k_rowPtrs_(myRow);
2005  }
2006 
2007  ret.numEntries = (this->k_numRowEntries_.extent (0) == 0) ?
2008  ret.allocSize :
2009  this->k_numRowEntries_(myRow);
2010  }
2011  else { // DynamicProfile
2012  ret.offset1D = STINV;
2013  if (this->isLocallyIndexed ()) {
2014  ret.allocSize = (this->lclInds2D_.size () == 0) ?
2015  size_t (0) :
2016  this->lclInds2D_[myRow].size ();
2017  }
2018  else {
2019  ret.allocSize = (this->gblInds2D_.size () == 0) ?
2020  size_t (0) :
2021  this->gblInds2D_[myRow].size ();
2022  }
2023 
2024  ret.numEntries = (this->k_numRowEntries_.extent (0) == 0) ?
2025  size_t (0) :
2026  this->k_numRowEntries_(myRow);
2027  }
2028  }
2029  else { // haven't performed allocation yet; probably won't hit this code
2030  // FIXME (mfh 07 Aug 2014) We want graph's constructors to
2031  // allocate, rather than doing lazy allocation at first insert.
2032  // This will make k_numAllocPerRow_ obsolete.
2033  ret.allocSize = (this->k_numAllocPerRow_.extent (0) != 0) ?
2034  this->k_numAllocPerRow_(myRow) : // this is a host View
2035  this->numAllocForAllRows_;
2036  ret.numEntries = 0;
2037  ret.offset1D = STINV;
2038  }
2039 
2040  return ret;
2041  }
2042 
2043 
2044  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2045  void
2047  staticAssertions () const
2048  {
2049  using Teuchos::OrdinalTraits;
2050  typedef LocalOrdinal LO;
2051  typedef GlobalOrdinal GO;
2052  typedef global_size_t GST;
2053 
2054  // Assumption: sizeof(GlobalOrdinal) >= sizeof(LocalOrdinal):
2055  // This is so that we can store local indices in the memory
2056  // formerly occupied by global indices.
2057  static_assert (sizeof (GlobalOrdinal) >= sizeof (LocalOrdinal),
2058  "Tpetra::CrsGraph: sizeof(GlobalOrdinal) must be >= sizeof(LocalOrdinal).");
2059  // Assumption: max(size_t) >= max(LocalOrdinal)
2060  // This is so that we can represent any LocalOrdinal as a size_t.
2061  static_assert (sizeof (size_t) >= sizeof (LocalOrdinal),
2062  "Tpetra::CrsGraph: sizeof(size_t) must be >= sizeof(LocalOrdinal).");
2063  static_assert (sizeof(GST) >= sizeof(size_t),
2064  "Tpetra::CrsGraph: sizeof(Tpetra::global_size_t) must be >= sizeof(size_t).");
2065 
2066  // FIXME (mfh 30 Sep 2015) We're not using
2067  // Teuchos::CompileTimeAssert any more. Can we do these checks
2068  // with static_assert?
2069 
2070  // can't call max() with CompileTimeAssert, because it isn't a
2071  // constant expression; will need to make this a runtime check
2072  const char msg[] = "Tpetra::CrsGraph: Object cannot be created with the "
2073  "given template arguments: size assumptions are not valid.";
2074  TEUCHOS_TEST_FOR_EXCEPTION(
2075  static_cast<size_t> (Teuchos::OrdinalTraits<LO>::max ()) > Teuchos::OrdinalTraits<size_t>::max (),
2076  std::runtime_error, msg);
2077  TEUCHOS_TEST_FOR_EXCEPTION(
2078  static_cast<GST> (Teuchos::OrdinalTraits<LO>::max ()) > static_cast<GST> (Teuchos::OrdinalTraits<GO>::max ()),
2079  std::runtime_error, msg);
2080  TEUCHOS_TEST_FOR_EXCEPTION(
2081  static_cast<size_t> (Teuchos::OrdinalTraits<GO>::max ()) > Teuchos::OrdinalTraits<GST>::max(),
2082  std::runtime_error, msg);
2083  TEUCHOS_TEST_FOR_EXCEPTION(
2084  Teuchos::OrdinalTraits<size_t>::max () > Teuchos::OrdinalTraits<GST>::max (),
2085  std::runtime_error, msg);
2086  }
2087 
2088 
2089  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2090  size_t
2093  const SLocalGlobalViews &newInds,
2094  const ELocalGlobal lg,
2095  const ELocalGlobal I)
2096  {
2097  using Teuchos::ArrayView;
2098  typedef LocalOrdinal LO;
2099  typedef GlobalOrdinal GO;
2100  const char tfecfFuncName[] = "insertIndices: ";
2101 #ifdef HAVE_TPETRA_DEBUG
2102  constexpr bool debug = true;
2103 #else
2104  constexpr bool debug = false;
2105 #endif // HAVE_TPETRA_DEBUG
2106 
2107  size_t oldNumEnt = 0;
2108  if (debug) {
2109  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2110  (lg != GlobalIndices && lg != LocalIndices, std::invalid_argument,
2111  "lg must be either GlobalIndices or LocalIndices.");
2112  oldNumEnt = this->getNumEntriesInLocalRow (rowinfo.localRow);
2113  }
2114 
2115  size_t numNewInds = 0;
2116  if (lg == GlobalIndices) { // input indices are global
2117  ArrayView<const GO> new_ginds = newInds.ginds;
2118  numNewInds = new_ginds.size();
2119  if (I == GlobalIndices) { // store global indices
2120  ArrayView<GO> gind_view = this->getGlobalViewNonConst (rowinfo);
2121  if (debug) {
2122  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2123  (static_cast<size_t> (gind_view.size ()) <
2124  rowinfo.numEntries + numNewInds, std::logic_error,
2125  "gind_view.size() = " << gind_view.size ()
2126  << " < rowinfo.numEntries (= " << rowinfo.numEntries
2127  << ") + numNewInds (= " << numNewInds << ").");
2128  }
2129  GO* const gblColInds_out = gind_view.getRawPtr () + rowinfo.numEntries;
2130  for (size_t k = 0; k < numNewInds; ++k) {
2131  gblColInds_out[k] = new_ginds[k];
2132  }
2133  }
2134  else if (I == LocalIndices) { // store local indices
2135  ArrayView<LO> lind_view = this->getLocalViewNonConst (rowinfo);
2136  if (debug) {
2137  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2138  (static_cast<size_t> (lind_view.size ()) <
2139  rowinfo.numEntries + numNewInds, std::logic_error,
2140  "lind_view.size() = " << lind_view.size ()
2141  << " < rowinfo.numEntries (= " << rowinfo.numEntries
2142  << ") + numNewInds (= " << numNewInds << ").");
2143  }
2144  LO* const lclColInds_out = lind_view.getRawPtr () + rowinfo.numEntries;
2145  for (size_t k = 0; k < numNewInds; ++k) {
2146  lclColInds_out[k] = colMap_->getLocalElement (new_ginds[k]);
2147  }
2148  }
2149  }
2150  else if (lg == LocalIndices) { // input indices are local
2151  ArrayView<const LO> new_linds = newInds.linds;
2152  numNewInds = new_linds.size();
2153  if (I == LocalIndices) { // store local indices
2154  ArrayView<LO> lind_view = this->getLocalViewNonConst (rowinfo);
2155  if (debug) {
2156  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2157  (static_cast<size_t> (lind_view.size ()) <
2158  rowinfo.numEntries + numNewInds, std::logic_error,
2159  "lind_view.size() = " << lind_view.size ()
2160  << " < rowinfo.numEntries (= " << rowinfo.numEntries
2161  << ") + numNewInds (= " << numNewInds << ").");
2162  }
2163  LO* const lclColInds_out = lind_view.getRawPtr () + rowinfo.numEntries;
2164  for (size_t k = 0; k < numNewInds; ++k) {
2165  lclColInds_out[k] = new_linds[k];
2166  }
2167  }
2168  else if (I == GlobalIndices) {
2169  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2170  (true, std::logic_error, "The case where the input indices are local "
2171  "and the indices to write are global (lg=LocalIndices, I="
2172  "GlobalIndices) is not implemented, because it does not make sense."
2173  << std::endl << "If you have correct local column indices, that "
2174  "means the graph has a column Map. In that case, you should be "
2175  "storing local indices.");
2176  }
2177  }
2178 
2179  rowinfo.numEntries += numNewInds;
2180  this->k_numRowEntries_(rowinfo.localRow) += numNewInds;
2181  this->setLocallyModified ();
2182 
2183  if (debug) {
2184  const size_t chkNewNumEnt =
2185  this->getNumEntriesInLocalRow (rowinfo.localRow);
2186  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2187  (chkNewNumEnt != oldNumEnt + numNewInds, std::logic_error,
2188  "chkNewNumEnt = " << chkNewNumEnt
2189  << " != oldNumEnt (= " << oldNumEnt
2190  << ") + numNewInds (= " << numNewInds << ").");
2191  }
2192 
2193  return numNewInds;
2194  }
2195 
2196  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2197  size_t
2199  insertGlobalIndicesImpl (const LocalOrdinal lclRow,
2200  const GlobalOrdinal inputGblColInds[],
2201  const size_t numInputInds)
2202  {
2203  return this->insertGlobalIndicesImpl (this->getRowInfo (lclRow),
2204  inputGblColInds, numInputInds);
2205  }
2206 
2207  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2208  size_t
2211  const GlobalOrdinal inputGblColInds[],
2212  const size_t numInputInds,
2213  std::function<void(const size_t, const size_t, const size_t)> fun)
2214  {
2215  using Kokkos::View;
2216  using Kokkos::subview;
2217  using Kokkos::MemoryUnmanaged;
2218  using LO = LocalOrdinal;
2219  using GO = GlobalOrdinal;
2220  const char tfecfFuncName[] = "insertGlobalIndicesImpl: ";
2221 #ifdef HAVE_TPETRA_DEBUG
2222  constexpr bool debug = true;
2223 #else
2224  constexpr bool debug = false;
2225 #endif // HAVE_TPETRA_DEBUG
2226 
2227  const LO lclRow = static_cast<LO> (rowInfo.localRow);
2228 
2229  if (this->getProfileType () == StaticProfile) {
2230  auto numEntries = rowInfo.numEntries;
2231  using inp_view_type = View<const GO*, execution_space, MemoryUnmanaged>;
2232  inp_view_type inputInds(inputGblColInds, numInputInds);
2233  size_t numInserted = Details::insertCrsIndices(lclRow, k_rowPtrs_,
2234  this->k_gblInds1D_, numEntries, inputInds, fun);
2235  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2236  numInserted == Teuchos::OrdinalTraits<size_t>::invalid(),
2237  std::runtime_error,
2238  "There is not enough capacity to insert indices in to row " << lclRow <<
2239  ". The upper bound on the number of entries in this row must be increased to "
2240  "accommodate one or more of the new indices.");
2241  this->k_numRowEntries_(lclRow) += numInserted;
2242  this->setLocallyModified();
2243  return numInserted;
2244  }
2245  else {
2246  // NOTE (DYNAMICPROFILE_REMOVAL) remove block
2247  size_t newNumEntries = rowInfo.numEntries + numInputInds; // preliminary
2248  if (newNumEntries > rowInfo.allocSize) {
2249  // update allocation, doubling size to reduce # reallocations
2250  size_t newAllocSize = 2*rowInfo.allocSize;
2251  if (newAllocSize < newNumEntries) {
2252  newAllocSize = newNumEntries;
2253  }
2254  this->gblInds2D_[lclRow].resize (newAllocSize);
2255  } // newNumEntries > rowInfo.allocSize
2256 
2257  // Copy new indices at end of global index array
2258  GO* const whereToPutGblColInds =
2259  this->gblInds2D_[lclRow].getRawPtr () + rowInfo.numEntries;
2260  for (size_t k_new = 0; k_new < numInputInds; ++k_new) {
2261  whereToPutGblColInds[k_new] = inputGblColInds[k_new];
2262  }
2263  this->k_numRowEntries_(lclRow) += numInputInds;
2264  this->setLocallyModified ();
2265 
2266  if (debug) {
2267  const size_t chkNewNumEntries = this->getNumEntriesInLocalRow (lclRow);
2268  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2269  (chkNewNumEntries != newNumEntries, std::logic_error,
2270  "getNumEntriesInLocalRow(lclRow=" << lclRow << ") = "
2271  << chkNewNumEntries << " != newNumEntries = " << newNumEntries
2272  << ". Please report this bug to the Tpetra developers.");
2273  }
2274  return numInputInds;
2275  }
2276  }
2277 
2278 
2279  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2280  void
2282  insertLocalIndicesImpl (const LocalOrdinal myRow,
2283  const Teuchos::ArrayView<const LocalOrdinal>& indices,
2284  std::function<void(const size_t, const size_t, const size_t)> fun)
2285  {
2286  using Kokkos::MemoryUnmanaged;
2287  using Kokkos::subview;
2288  using Kokkos::View;
2289  using LO = LocalOrdinal;
2290  const char tfecfFuncName[] = "insertLocallIndicesImpl: ";
2291 
2292  const RowInfo rowInfo = this->getRowInfo(myRow);
2293 
2294  size_t numNewInds = 0;
2295  size_t newNumEntries = 0;
2296 
2297  if (this->getProfileType () == StaticProfile) {
2298  auto numEntries = rowInfo.numEntries;
2299  // Note: Teuchos::ArrayViews are in HostSpace
2300  using inp_view_type = View<const LO*, Kokkos::HostSpace, MemoryUnmanaged>;
2301  inp_view_type inputInds(indices.getRawPtr(), indices.size());
2302  auto numInserted = Details::insertCrsIndices(myRow, k_rowPtrs_,
2303  this->k_lclInds1D_, numEntries, inputInds, fun);
2304  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2305  numInserted == Teuchos::OrdinalTraits<size_t>::invalid(),
2306  std::runtime_error,
2307  "There is not enough capacity to insert indices in to row " << myRow <<
2308  ". The upper bound on the number of entries in this row must be increased to "
2309  "accommodate one or more of the new indices.");
2310  numNewInds = numInserted;
2311  newNumEntries = rowInfo.numEntries + numNewInds;
2312  }
2313  else {
2314  // NOTE (DYNAMICPROFILE_REMOVAL) remove block
2315  numNewInds = indices.size();
2316  newNumEntries = rowInfo.numEntries + numNewInds;
2317  if (newNumEntries > rowInfo.allocSize) {
2318  // update allocation, doubling size to reduce number of reallocations
2319  size_t newAllocSize = 2*rowInfo.allocSize;
2320  if (newAllocSize < newNumEntries) {
2321  newAllocSize = newNumEntries;
2322  }
2323  this->lclInds2D_[myRow].resize(newAllocSize);
2324  }
2325  std::copy (indices.begin (), indices.end (),
2326  this->lclInds2D_[myRow].begin () + rowInfo.numEntries);
2327  }
2328 
2329  this->k_numRowEntries_(myRow) += numNewInds;
2330  this->setLocallyModified ();
2331 
2332 #ifdef HAVE_TPETRA_DEBUG
2333  constexpr bool debug = true;
2334 #else
2335  constexpr bool debug = false;
2336 #endif // HAVE_TPETRA_DEBUG
2337 
2338  if (debug) {
2339  const size_t chkNewNumEntries = this->getNumEntriesInLocalRow (myRow);
2340  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2341  (chkNewNumEntries != newNumEntries, std::logic_error,
2342  "getNumEntriesInLocalRow(" << myRow << ") = " << chkNewNumEntries
2343  << " != newNumEntries = " << newNumEntries
2344  << ". Please report this bug to the Tpetra developers.");
2345  }
2346  }
2347 
2348 
2349  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2350  size_t
2353  const Teuchos::ArrayView<const LocalOrdinal>& indices,
2354  std::function<void(const size_t, const size_t, const size_t)> fun) const
2355  {
2356  const char tfecfFuncName[] = "findLocalIndices: ";
2357  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2358  this->getProfileType() != StaticProfile,
2359  std::runtime_error,
2360  "findLocalIndices requires the graph have StaticProfile");
2361 
2362  using LO = LocalOrdinal;
2363  using Kokkos::View;
2364  using Kokkos::MemoryUnmanaged;
2365  using inp_view_type = View<const LO*, execution_space, MemoryUnmanaged>;
2366  inp_view_type inputInds(indices.getRawPtr(), indices.size());
2367 
2368  size_t numFound = 0;
2369  LO lclRow = rowInfo.localRow;
2370  if (this->isLocallyIndexed())
2371  {
2372  numFound = Details::findCrsIndices(lclRow, k_rowPtrs_,
2373  this->k_lclInds1D_, inputInds, fun);
2374  }
2375  else if (this->isGloballyIndexed())
2376  {
2377  if (this->colMap_.is_null())
2378  return Teuchos::OrdinalTraits<size_t>::invalid();
2379  const auto& colMap = *(this->colMap_);
2380  auto map = [&](LO const lclInd){return colMap.getGlobalElement(lclInd);};
2381  numFound = Details::findCrsIndices(lclRow, k_rowPtrs_,
2382  this->k_gblInds1D_, inputInds, map, fun);
2383  }
2384  return numFound;
2385  }
2386 
2387 
2388  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2389  size_t
2391  findGlobalIndices(const RowInfo& rowInfo,
2392  const Teuchos::ArrayView<const GlobalOrdinal>& indices,
2393  std::function<void(const size_t, const size_t, const size_t)> fun) const
2394  {
2395  const char tfecfFuncName[] = "findGlobalIndices: ";
2396  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2397  this->getProfileType() != StaticProfile,
2398  std::runtime_error,
2399  "findLocalIndices requires the graph have StaticProfile");
2400 
2401  using GO = GlobalOrdinal;
2402  using Kokkos::View;
2403  using Kokkos::MemoryUnmanaged;
2404  auto invalidCount = Teuchos::OrdinalTraits<size_t>::invalid();
2405 
2406  using inp_view_type = View<const GO*, execution_space, MemoryUnmanaged>;
2407  inp_view_type inputInds(indices.getRawPtr(), indices.size());
2408 
2409  size_t numFound = 0;
2410  LocalOrdinal lclRow = rowInfo.localRow;
2411  if (this->isLocallyIndexed())
2412  {
2413  if (this->colMap_.is_null())
2414  return invalidCount;
2415  const auto& colMap = *(this->colMap_);
2416  auto map = [&](GO const gblInd){return colMap.getLocalElement(gblInd);};
2417  numFound = Details::findCrsIndices(lclRow, k_rowPtrs_,
2418  this->k_lclInds1D_, inputInds, map, fun);
2419  }
2420  else if (this->isGloballyIndexed())
2421  {
2422  numFound = Details::findCrsIndices(lclRow, k_rowPtrs_,
2423  this->k_gblInds1D_, inputInds, fun);
2424  }
2425  return numFound;
2426  }
2427 
2428 
2429  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2430  size_t
2433  const bool sorted,
2434  const bool merged)
2435  {
2436  const size_t origNumEnt = rowInfo.numEntries;
2437  if (origNumEnt != Tpetra::Details::OrdinalTraits<size_t>::invalid () &&
2438  origNumEnt != 0) {
2439  auto lclColInds = this->getLocalKokkosRowViewNonConst (rowInfo);
2440 
2441  LocalOrdinal* const lclColIndsRaw = lclColInds.data ();
2442  if (! sorted) {
2443  // FIXME (mfh 08 May 2017) This assumes CUDA UVM.
2444  std::sort (lclColIndsRaw, lclColIndsRaw + origNumEnt);
2445  }
2446 
2447  if (! merged) {
2448  LocalOrdinal* const beg = lclColIndsRaw;
2449  LocalOrdinal* const end = beg + rowInfo.numEntries;
2450  // FIXME (mfh 08 May 2017) This assumes CUDA UVM.
2451  LocalOrdinal* const newend = std::unique (beg, end);
2452  const size_t newNumEnt = newend - beg;
2453 
2454  // NOTE (mfh 08 May 2017) This is a host View, so it does not assume UVM.
2455  this->k_numRowEntries_(rowInfo.localRow) = newNumEnt;
2456  return origNumEnt - newNumEnt; // the number of duplicates in the row
2457  }
2458  else {
2459  return static_cast<size_t> (0); // assume no duplicates
2460  }
2461  }
2462  else {
2463  return static_cast<size_t> (0); // no entries in the row
2464  }
2465  }
2466 
2467 
2468  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2469  void
2471  setDomainRangeMaps (const Teuchos::RCP<const map_type>& domainMap,
2472  const Teuchos::RCP<const map_type>& rangeMap)
2473  {
2474  // simple pointer comparison for equality
2475  if (domainMap_ != domainMap) {
2476  domainMap_ = domainMap;
2477  importer_ = Teuchos::null;
2478  }
2479  if (rangeMap_ != rangeMap) {
2480  rangeMap_ = rangeMap;
2481  exporter_ = Teuchos::null;
2482  }
2483  }
2484 
2485 
2486  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2487  void
2490  {
2491  globalNumEntries_ = Teuchos::OrdinalTraits<global_size_t>::invalid ();
2492  globalNumDiags_ = Teuchos::OrdinalTraits<global_size_t>::invalid ();
2493  globalMaxNumRowEntries_ = Teuchos::OrdinalTraits<global_size_t>::invalid ();
2494  haveGlobalConstants_ = false;
2495  }
2496 
2497 
2498  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2499  void
2502  {
2503  const bool debug = ::Tpetra::Details::Behavior::debug ();
2504  if (debug) {
2505  const char tfecfFuncName[] = "checkInternalState: ";
2506  const char suffix[] = " Please report this bug to the Tpetra developers.";
2507 
2508  const global_size_t GSTI = Teuchos::OrdinalTraits<global_size_t>::invalid ();
2509  //const size_t STI = Teuchos::OrdinalTraits<size_t>::invalid (); // unused
2510  // check the internal state of this data structure
2511  // this is called by numerous state-changing methods, in a debug build, to ensure that the object
2512  // always remains in a valid state
2513 
2514  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2515  (this->rowMap_.is_null (), std::logic_error,
2516  "Row Map is null." << suffix);
2517  // This may access the row Map, so we need to check first (above)
2518  // whether the row Map is null.
2519  const LocalOrdinal lclNumRows =
2520  static_cast<LocalOrdinal> (this->getNodeNumRows ());
2521 
2522  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2523  (this->isFillActive () == this->isFillComplete (), std::logic_error,
2524  "Graph cannot be both fill active and fill complete." << suffix);
2525  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2526  (this->isFillComplete () &&
2527  (this->colMap_.is_null () ||
2528  this->rangeMap_.is_null () ||
2529  this->domainMap_.is_null ()),
2530  std::logic_error,
2531  "Graph is full complete, but at least one of {column, range, domain} "
2532  "Map is null." << suffix);
2533  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2534  (this->isStorageOptimized () && ! this->indicesAreAllocated (),
2535  std::logic_error, "Storage is optimized, but indices are not "
2536  "allocated, not even trivially." << suffix);
2537  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2538  (this->indicesAreAllocated_ &&
2539  (this->storageStatus_ == ::Tpetra::Details::STORAGE_1D_PACKED ||
2540  this->storageStatus_ == ::Tpetra::Details::STORAGE_1D_UNPACKED) &&
2541  this->pftype_ != StaticProfile, std::logic_error,
2542  "Graph claims to have allocated indices and 1-D storage "
2543  "(either packed or unpacked), but also claims to be DynamicProfile.");
2544  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2545  (this->indicesAreAllocated_ &&
2546  this->storageStatus_ == ::Tpetra::Details::STORAGE_2D &&
2547  this->pftype_ == StaticProfile, std::logic_error,
2548  "Graph claims to have allocated indices and 2-D storage, "
2549  "but also claims to be StaticProfile.");
2550  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2551  (this->indicesAreAllocated_ &&
2552  this->storageStatus_ == ::Tpetra::Details::STORAGE_2D &&
2553  this->isLocallyIndexed () &&
2554  static_cast<LocalOrdinal> (this->lclInds2D_.size ()) != lclNumRows,
2555  std::logic_error,
2556  "Graph claims to have allocated indices, be locally indexed, and have "
2557  "2-D storage, but lclInds2D_.size() = " << this->lclInds2D_.size ()
2558  << " != getNodeNumRows() = " << lclNumRows << ".");
2559  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2560  (this->indicesAreAllocated_ &&
2561  this->storageStatus_ == ::Tpetra::Details::STORAGE_2D &&
2562  this->isGloballyIndexed () &&
2563  static_cast<LocalOrdinal> (this->gblInds2D_.size ()) != lclNumRows,
2564  std::logic_error,
2565  "Graph claims to have allocated indices, be globally indexed, and have "
2566  "2-D storage, but gblInds2D_.size() = " << this->gblInds2D_.size ()
2567  << " != getNodeNumRows() = " << lclNumRows << ".");
2568 
2569  size_t nodeAllocSize = 0;
2570  try {
2571  nodeAllocSize = this->getNodeAllocationSize ();
2572  }
2573  catch (std::logic_error& e) {
2574  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2575  (true, std::runtime_error, "getNodeAllocationSize threw "
2576  "std::logic_error: " << e.what ());
2577  }
2578  catch (std::exception& e) {
2579  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2580  (true, std::runtime_error, "getNodeAllocationSize threw an "
2581  "std::exception: " << e.what ());
2582  }
2583  catch (...) {
2584  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2585  (true, std::runtime_error, "getNodeAllocationSize threw an exception "
2586  "not a subclass of std::exception.");
2587  }
2588 
2589  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2590  (this->isStorageOptimized () &&
2591  nodeAllocSize != this->getNodeNumEntries (),
2592  std::logic_error, "Storage is optimized, but "
2593  "this->getNodeAllocationSize() = " << nodeAllocSize
2594  << " != this->getNodeNumEntries() = " << this->getNodeNumEntries ()
2595  << "." << suffix);
2596  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2597  (! this->haveGlobalConstants_ &&
2598  (this->globalNumEntries_ != GSTI ||
2599  this->globalMaxNumRowEntries_ != GSTI),
2600  std::logic_error, "Graph claims not to have global constants, but "
2601  "some of the global constants are not marked as invalid." << suffix);
2602  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2603  (this->haveGlobalConstants_ &&
2604  (this->globalNumEntries_ == GSTI ||
2605  this->globalMaxNumRowEntries_ == GSTI),
2606  std::logic_error, "Graph claims to have global constants, but "
2607  "some of them are marked as invalid." << suffix);
2608  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2609  (this->haveGlobalConstants_ &&
2610  (this->globalNumEntries_ < this->getNodeNumEntries () ||
2611  this->globalMaxNumRowEntries_ < this->nodeMaxNumRowEntries_),
2612  std::logic_error, "Graph claims to have global constants, and "
2613  "all of the values of the global constants are valid, but "
2614  "some of the local constants are greater than "
2615  "their corresponding global constants." << suffix);
2616  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2617  (this->indicesAreAllocated () &&
2618  (this->numAllocForAllRows_ != 0 ||
2619  this->k_numAllocPerRow_.extent (0) != 0),
2620  std::logic_error, "The graph claims that its indices are allocated, but "
2621  "either numAllocForAllRows_ (= " << this->numAllocForAllRows_ << ") is "
2622  "nonzero, or k_numAllocPerRow_ has nonzero dimension. In other words, "
2623  "the graph is supposed to release its \"allocation specifications\" "
2624  "when it allocates its indices." << suffix);
2625  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2626  (this->isStorageOptimized () && this->pftype_ != StaticProfile,
2627  std::logic_error,
2628  "Storage is optimized, but graph is not StaticProfile." << suffix);
2629  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2630  (this->isGloballyIndexed () &&
2631  this->k_rowPtrs_.extent (0) != 0 &&
2632  (static_cast<size_t> (this->k_rowPtrs_.extent (0)) != static_cast<size_t> (lclNumRows + 1) ||
2633  this->k_rowPtrs_(lclNumRows) != static_cast<size_t> (this->k_gblInds1D_.extent (0))),
2634  std::logic_error, "If k_rowPtrs_ has nonzero size and "
2635  "the graph is globally indexed, then "
2636  "k_rowPtrs_ must have N+1 rows, and "
2637  "k_rowPtrs_(N) must equal k_gblInds1D_.extent(0)." << suffix);
2638  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2639  (this->isLocallyIndexed () &&
2640  this->k_rowPtrs_.extent (0) != 0 &&
2641  (static_cast<size_t> (k_rowPtrs_.extent (0)) != static_cast<size_t> (lclNumRows + 1) ||
2642  this->k_rowPtrs_(lclNumRows) != static_cast<size_t> (this->k_lclInds1D_.extent (0))),
2643  std::logic_error, "If k_rowPtrs_ has nonzero size and "
2644  "the graph is locally indexed, then "
2645  "k_rowPtrs_ must have N+1 rows, and "
2646  "k_rowPtrs_(N) must equal k_lclInds1D_.extent(0)." << suffix);
2647 
2648  if (this->pftype_ != StaticProfile) {
2649  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2650  (this->indicesAreAllocated () &&
2651  this->getNodeNumRows () > 0 &&
2652  this->lclInds2D_.is_null () &&
2653  this->gblInds2D_.is_null (),
2654  std::logic_error, "Graph has DynamicProfile, indices are allocated, and "
2655  "the calling process has nonzero rows, but 2-D column index storage "
2656  "(whether local or global) is not present." << suffix);
2657  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2658  (this->indicesAreAllocated () &&
2659  this->getNodeNumRows () > 0 &&
2660  this->k_numRowEntries_.extent (0) == 0,
2661  std::logic_error, "Graph has DynamicProfile, indices are allocated, and "
2662  "the calling process has nonzero rows, but k_numRowEntries_ is not "
2663  "present." << suffix);
2664  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2665  (this->k_lclInds1D_.extent (0) != 0 ||
2666  this->k_gblInds1D_.extent (0) != 0,
2667  std::logic_error, "Graph has DynamicProfile, but "
2668  "1-D allocations are present." << suffix);
2669  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2670  (this->k_rowPtrs_.extent (0) != 0,
2671  std::logic_error, "Graph has DynamicProfile, but "
2672  "row offsets are present." << suffix);
2673  }
2674  else if (this->pftype_ == StaticProfile) {
2675  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2676  (this->indicesAreAllocated () &&
2677  nodeAllocSize > 0 &&
2678  this->k_lclInds1D_.extent (0) == 0 &&
2679  this->k_gblInds1D_.extent (0) == 0,
2680  std::logic_error, "Graph has StaticProfile and is allocated "
2681  "nonnontrivally, but 1-D allocations are not present." << suffix);
2682  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2683  (this->lclInds2D_ != Teuchos::null || this->gblInds2D_ != Teuchos::null,
2684  std::logic_error, "Graph has StaticProfile, but 2-D allocations are "
2685  "present." << suffix);
2686  }
2687 
2688  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2689  (! this->indicesAreAllocated () &&
2690  ((this->k_rowPtrs_.extent (0) != 0 ||
2691  this->k_numRowEntries_.extent (0) != 0) ||
2692  this->k_lclInds1D_.extent (0) != 0 ||
2693  this->lclInds2D_ != Teuchos::null ||
2694  this->k_gblInds1D_.extent (0) != 0 ||
2695  this->gblInds2D_ != Teuchos::null),
2696  std::logic_error, "If indices are not allocated, "
2697  "then none of the buffers should be." << suffix);
2698  // indices may be local or global only if they are allocated
2699  // (numAllocated is redundant; could simply be indicesAreLocal_ ||
2700  // indicesAreGlobal_)
2701  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2702  ((this->indicesAreLocal_ || this->indicesAreGlobal_) &&
2703  ! this->indicesAreAllocated_,
2704  std::logic_error, "Indices may be local or global only if they are "
2705  "allocated." << suffix);
2706  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2707  (this->indicesAreLocal_ && this->indicesAreGlobal_,
2708  std::logic_error, "Indices may not be both local and global." << suffix);
2709  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2710  (this->indicesAreLocal_ &&
2711  (this->k_gblInds1D_.extent (0) != 0 || ! this->gblInds2D_.is_null ()),
2712  std::logic_error, "Indices are local, but either "
2713  "k_gblInds1D_.extent(0) (= "
2714  << this->k_gblInds1D_.extent (0) << ") != 0, or "
2715  "gblInds2D_ is not null. In other words, if indices are local, "
2716  "then global allocations should not be present." << suffix);
2717  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2718  (this->indicesAreGlobal_ &&
2719  (this->k_lclInds1D_.extent (0) != 0 ||
2720  ! this->lclInds2D_.is_null ()),
2721  std::logic_error, "Indices are global, but either "
2722  "k_lclInds1D_.extent(0) (= "
2723  << this->k_lclInds1D_.extent (0) << ") != 0, or "
2724  "lclInds2D_ is not null. In other words, if indices are global, "
2725  "then local allocations should not be present." << suffix);
2726  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2727  (this->indicesAreLocal_ &&
2728  nodeAllocSize > 0 &&
2729  this->k_lclInds1D_.extent (0) == 0 &&
2730  this->getNodeNumRows () > 0 &&
2731  this->lclInds2D_.is_null (),
2732  std::logic_error, "Indices are local, getNodeAllocationSize() = "
2733  << nodeAllocSize << " > 0, k_lclInds1D_.extent(0) = 0, "
2734  "getNodeNumRows() = " << this->getNodeNumRows () << " > 0, and "
2735  "lclInds2D_ is null." << suffix);
2736  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2737  (this->indicesAreGlobal_ &&
2738  nodeAllocSize > 0 &&
2739  this->k_gblInds1D_.extent (0) == 0 &&
2740  this->getNodeNumRows () > 0 &&
2741  this->gblInds2D_.is_null (),
2742  std::logic_error, "Indices are global, getNodeAllocationSize() = "
2743  << nodeAllocSize << " > 0, k_gblInds1D_.extent(0) = 0, "
2744  "getNodeNumRows() = " << this->getNodeNumRows () << " > 0, and "
2745  "gblInds2D_ is null." << suffix);
2746  // check the actual allocations
2747  if (this->indicesAreAllocated () &&
2748  this->pftype_ == StaticProfile &&
2749  this->k_rowPtrs_.extent (0) != 0) {
2750  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2751  (static_cast<size_t> (this->k_rowPtrs_.extent (0)) !=
2752  this->getNodeNumRows () + 1,
2753  std::logic_error, "Graph is StaticProfile, indices are allocated, and "
2754  "k_rowPtrs_ has nonzero length, but k_rowPtrs_.extent(0) = "
2755  << this->k_rowPtrs_.extent (0) << " != getNodeNumRows()+1 = "
2756  << (this->getNodeNumRows () + 1) << "." << suffix);
2757  const size_t actualNumAllocated =
2758  ::Tpetra::Details::getEntryOnHost (this->k_rowPtrs_, this->getNodeNumRows ());
2759  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2760  (this->isLocallyIndexed () &&
2761  static_cast<size_t> (this->k_lclInds1D_.extent (0)) != actualNumAllocated,
2762  std::logic_error, "Graph is StaticProfile and locally indexed, "
2763  "indices are allocated, and k_rowPtrs_ has nonzero length, but "
2764  "k_lclInds1D_.extent(0) = " << this->k_lclInds1D_.extent (0)
2765  << " != actualNumAllocated = " << actualNumAllocated << suffix);
2766  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2767  (this->isGloballyIndexed () &&
2768  static_cast<size_t> (this->k_gblInds1D_.extent (0)) != actualNumAllocated,
2769  std::logic_error, "Graph is StaticProfile and globally indexed, "
2770  "indices are allocated, and k_rowPtrs_ has nonzero length, but "
2771  "k_gblInds1D_.extent(0) = " << this->k_gblInds1D_.extent (0)
2772  << " != actualNumAllocated = " << actualNumAllocated << suffix);
2773  }
2774  }
2775  }
2776 
2777 
2778  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2779  size_t
2781  getNumEntriesInGlobalRow (GlobalOrdinal globalRow) const
2782  {
2783  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (globalRow);
2784  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2785  return Teuchos::OrdinalTraits<size_t>::invalid ();
2786  }
2787  else {
2788  return rowInfo.numEntries;
2789  }
2790  }
2791 
2792 
2793  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2794  size_t
2796  getNumEntriesInLocalRow (LocalOrdinal localRow) const
2797  {
2798  const RowInfo rowInfo = this->getRowInfo (localRow);
2799  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2800  return Teuchos::OrdinalTraits<size_t>::invalid ();
2801  }
2802  else {
2803  return rowInfo.numEntries;
2804  }
2805  }
2806 
2807 
2808  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2809  size_t
2811  getNumAllocatedEntriesInGlobalRow (GlobalOrdinal globalRow) const
2812  {
2813  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (globalRow);
2814  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2815  return Teuchos::OrdinalTraits<size_t>::invalid ();
2816  }
2817  else {
2818  return rowInfo.allocSize;
2819  }
2820  }
2821 
2822 
2823  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2824  size_t
2826  getNumAllocatedEntriesInLocalRow (LocalOrdinal localRow) const
2827  {
2828  const RowInfo rowInfo = this->getRowInfo (localRow);
2829  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2830  return Teuchos::OrdinalTraits<size_t>::invalid ();
2831  }
2832  else {
2833  return rowInfo.allocSize;
2834  }
2835  }
2836 
2837 
2838  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2839  Teuchos::ArrayRCP<const size_t>
2842  {
2843  using Kokkos::ViewAllocateWithoutInitializing;
2844  using Kokkos::create_mirror_view;
2845  using Teuchos::ArrayRCP;
2846  typedef typename local_graph_type::row_map_type row_map_type;
2847  typedef typename row_map_type::non_const_value_type row_offset_type;
2848  const char prefix[] = "Tpetra::CrsGraph::getNodeRowPtrs: ";
2849  const char suffix[] = " Please report this bug to the Tpetra developers.";
2850  const bool debug = ::Tpetra::Details::Behavior::debug ();
2851 
2852  const size_t size = k_rowPtrs_.extent (0);
2853  constexpr bool same = std::is_same<size_t, row_offset_type>::value;
2854 
2855  if (size == 0) {
2856  return ArrayRCP<const size_t> ();
2857  }
2858 
2859  ArrayRCP<const row_offset_type> ptr_rot;
2860  ArrayRCP<const size_t> ptr_st;
2861  if (same) { // size_t == row_offset_type
2862  // NOTE (mfh 22 Mar 2015) In a debug build of Kokkos, the result
2863  // of create_mirror_view might actually be a new allocation.
2864  // This helps with debugging when there are two memory spaces.
2865  typename row_map_type::HostMirror ptr_h = create_mirror_view (k_rowPtrs_);
2866  Kokkos::deep_copy (ptr_h, k_rowPtrs_);
2867  if (debug) {
2868  TEUCHOS_TEST_FOR_EXCEPTION
2869  (ptr_h.extent (0) != k_rowPtrs_.extent (0), std::logic_error,
2870  prefix << "size_t == row_offset_type, but ptr_h.extent(0) = "
2871  << ptr_h.extent (0) << " != k_rowPtrs_.extent(0) = "
2872  << k_rowPtrs_.extent (0) << ".");
2873  TEUCHOS_TEST_FOR_EXCEPTION
2874  (same && size != 0 && k_rowPtrs_.data () == nullptr, std::logic_error,
2875  prefix << "size_t == row_offset_type and k_rowPtrs_.extent(0) = "
2876  << size << " != 0, but k_rowPtrs_.data() == nullptr." << suffix);
2877  TEUCHOS_TEST_FOR_EXCEPTION
2878  (same && size != 0 && ptr_h.data () == nullptr, std::logic_error,
2879  prefix << "size_t == row_offset_type and k_rowPtrs_.extent(0) = "
2880  << size << " != 0, but create_mirror_view(k_rowPtrs_).data() "
2881  "== nullptr." << suffix);
2882  }
2883  ptr_rot = Kokkos::Compat::persistingView (ptr_h);
2884  }
2885  else { // size_t != row_offset_type
2886  typedef Kokkos::View<size_t*, device_type> ret_view_type;
2887  ret_view_type ptr_d (ViewAllocateWithoutInitializing ("ptr"), size);
2888  ::Tpetra::Details::copyOffsets (ptr_d, k_rowPtrs_);
2889  typename ret_view_type::HostMirror ptr_h = create_mirror_view (ptr_d);
2890  Kokkos::deep_copy (ptr_h, ptr_d);
2891  ptr_st = Kokkos::Compat::persistingView (ptr_h);
2892  }
2893  if (debug) {
2894  TEUCHOS_TEST_FOR_EXCEPTION
2895  (same && size != 0 && ptr_rot.is_null (), std::logic_error,
2896  prefix << "size_t == row_offset_type and size = " << size
2897  << " != 0, but ptr_rot is null." << suffix);
2898  TEUCHOS_TEST_FOR_EXCEPTION
2899  (! same && size != 0 && ptr_st.is_null (), std::logic_error,
2900  prefix << "size_t != row_offset_type and size = " << size
2901  << " != 0, but ptr_st is null." << suffix);
2902  }
2903 
2904  // If size_t == row_offset_type, return a persisting host view of
2905  // k_rowPtrs_. Otherwise, return a size_t host copy of k_rowPtrs_.
2906  ArrayRCP<const size_t> retval =
2907  Kokkos::Impl::if_c<same,
2908  ArrayRCP<const row_offset_type>,
2909  ArrayRCP<const size_t> >::select (ptr_rot, ptr_st);
2910  if (debug) {
2911  TEUCHOS_TEST_FOR_EXCEPTION
2912  (size != 0 && retval.is_null (), std::logic_error,
2913  prefix << "size = " << size << " != 0, but retval is null." << suffix);
2914  }
2915  return retval;
2916  }
2917 
2918 
2919  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2920  Teuchos::ArrayRCP<const LocalOrdinal>
2923  {
2924  return Kokkos::Compat::persistingView (k_lclInds1D_);
2925  }
2926 
2927 
2928  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2929  void
2931  getLocalRowCopy (LocalOrdinal localRow,
2932  const Teuchos::ArrayView<LocalOrdinal>&indices,
2933  size_t& numEntries) const
2934  {
2935  using Teuchos::ArrayView;
2936  typedef LocalOrdinal LO;
2937  typedef GlobalOrdinal GO;
2938  const char tfecfFuncName[] = "getLocalRowCopy: ";
2939 
2940  TEUCHOS_TEST_FOR_EXCEPTION(
2941  isGloballyIndexed () && ! hasColMap (), std::runtime_error,
2942  "Tpetra::CrsGraph::getLocalRowCopy: The graph is globally indexed and "
2943  "does not have a column Map yet. That means we don't have local indices "
2944  "for columns yet, so it doesn't make sense to call this method. If the "
2945  "graph doesn't have a column Map yet, you should call fillComplete on "
2946  "it first.");
2947 
2948  // This does the right thing (reports an empty row) if the input
2949  // row is invalid.
2950  const RowInfo rowinfo = this->getRowInfo (localRow);
2951  // No side effects on error.
2952  const size_t theNumEntries = rowinfo.numEntries;
2953  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2954  (static_cast<size_t> (indices.size ()) < theNumEntries, std::runtime_error,
2955  "Specified storage (size==" << indices.size () << ") does not suffice "
2956  "to hold all " << theNumEntries << " entry/ies for this row.");
2957  numEntries = theNumEntries;
2958 
2959  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2960  if (isLocallyIndexed ()) {
2961  ArrayView<const LO> lview = getLocalView (rowinfo);
2962  for (size_t j = 0; j < theNumEntries; ++j) {
2963  indices[j] = lview[j];
2964  }
2965  }
2966  else if (isGloballyIndexed ()) {
2967  ArrayView<const GO> gview = getGlobalView (rowinfo);
2968  for (size_t j = 0; j < theNumEntries; ++j) {
2969  indices[j] = colMap_->getLocalElement (gview[j]);
2970  }
2971  }
2972  }
2973  }
2974 
2975 
2976  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2977  void
2979  getGlobalRowCopy (GlobalOrdinal globalRow,
2980  const Teuchos::ArrayView<GlobalOrdinal>& indices,
2981  size_t& numEntries) const
2982  {
2983  using Teuchos::ArrayView;
2984  const char tfecfFuncName[] = "getGlobalRowCopy: ";
2985 
2986  // This does the right thing (reports an empty row) if the input
2987  // row is invalid.
2988  const RowInfo rowinfo = getRowInfoFromGlobalRowIndex (globalRow);
2989  const size_t theNumEntries = rowinfo.numEntries;
2990  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2991  static_cast<size_t> (indices.size ()) < theNumEntries, std::runtime_error,
2992  "Specified storage (size==" << indices.size () << ") does not suffice "
2993  "to hold all " << theNumEntries << " entry/ies for this row.");
2994  numEntries = theNumEntries; // first side effect
2995 
2996  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2997  if (isLocallyIndexed ()) {
2998  ArrayView<const LocalOrdinal> lview = getLocalView (rowinfo);
2999  for (size_t j = 0; j < theNumEntries; ++j) {
3000  indices[j] = colMap_->getGlobalElement (lview[j]);
3001  }
3002  }
3003  else if (isGloballyIndexed ()) {
3004  ArrayView<const GlobalOrdinal> gview = getGlobalView (rowinfo);
3005  for (size_t j = 0; j < theNumEntries; ++j) {
3006  indices[j] = gview[j];
3007  }
3008  }
3009  }
3010  }
3011 
3012 
3013  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3014  void
3016  getLocalRowView (const LocalOrdinal localRow,
3017  Teuchos::ArrayView<const LocalOrdinal>& indices) const
3018  {
3019  const char tfecfFuncName[] = "getLocalRowView: ";
3020 #ifdef HAVE_TPETRA_DEBUG
3021  constexpr bool debug = true;
3022 #else
3023  constexpr bool debug = false;
3024 #endif // HAVE_TPETRA_DEBUG
3025 
3026  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3027  (isGloballyIndexed (), std::runtime_error, "The graph's indices are "
3028  "currently stored as global indices, so we cannot return a view with "
3029  "local column indices, whether or not the graph has a column Map. If "
3030  "the graph _does_ have a column Map, use getLocalRowCopy() instead.");
3031 
3032  // This does the right thing (reports an empty row) if the input
3033  // row is invalid.
3034  const RowInfo rowInfo = getRowInfo (localRow);
3035  indices = Teuchos::null;
3036  if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
3037  rowInfo.numEntries > 0) {
3038  indices = this->getLocalView (rowInfo);
3039  // getLocalView returns a view of the _entire_ row, including
3040  // any extra space at the end (which 1-D unpacked storage
3041  // might have, for example). That's why we have to take a
3042  // subview of the returned view.
3043  indices = indices (0, rowInfo.numEntries);
3044  }
3045 
3046  if (debug) {
3047  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3048  (static_cast<size_t> (indices.size ()) !=
3049  getNumEntriesInLocalRow (localRow), std::logic_error, "indices.size() "
3050  "= " << indices.size () << " != getNumEntriesInLocalRow(localRow=" <<
3051  localRow << ") = " << getNumEntriesInLocalRow (localRow) <<
3052  ". Please report this bug to the Tpetra developers.");
3053  }
3054  }
3055 
3056 
3057  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3058  void
3060  getGlobalRowView (const GlobalOrdinal globalRow,
3061  Teuchos::ArrayView<const GlobalOrdinal>& indices) const
3062  {
3063  const char tfecfFuncName[] = "getGlobalRowView: ";
3064 #ifdef HAVE_TPETRA_DEBUG
3065  constexpr bool debug = true;
3066 #else
3067  constexpr bool debug = false;
3068 #endif // HAVE_TPETRA_DEBUG
3069 
3070  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3071  (isLocallyIndexed (), std::runtime_error, "The graph's indices are "
3072  "currently stored as local indices, so we cannot return a view with "
3073  "global column indices. Use getGlobalRowCopy() instead.");
3074 
3075  // This does the right thing (reports an empty row) if the input
3076  // row is invalid.
3077  const RowInfo rowInfo = getRowInfoFromGlobalRowIndex (globalRow);
3078  indices = Teuchos::null;
3079  if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
3080  rowInfo.numEntries > 0) {
3081  indices = (this->getGlobalView (rowInfo)) (0, rowInfo.numEntries);
3082  }
3083 
3084  if (debug) {
3085  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3086  (static_cast<size_t> (indices.size ()) !=
3087  getNumEntriesInGlobalRow (globalRow),
3088  std::logic_error, "indices.size() = " << indices.size ()
3089  << " != getNumEntriesInGlobalRow(globalRow=" << globalRow << ") = "
3090  << getNumEntriesInGlobalRow (globalRow)
3091  << ". Please report this bug to the Tpetra developers.");
3092  }
3093  }
3094 
3095 
3096  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3097  void
3099  insertLocalIndices (const LocalOrdinal localRow,
3100  const Teuchos::ArrayView<const LocalOrdinal>& indices)
3101  {
3102  const char tfecfFuncName[] = "insertLocalIndices: ";
3103 
3104  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3105  (! isFillActive (), std::runtime_error, "Fill must be active.");
3106  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3107  (isGloballyIndexed (), std::runtime_error,
3108  "Graph indices are global; use insertGlobalIndices().");
3109  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3110  (! hasColMap (), std::runtime_error,
3111  "Cannot insert local indices without a column Map.");
3112  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3113  (! rowMap_->isNodeLocalElement (localRow), std::runtime_error,
3114  "Local row index " << localRow << " is not in the row Map "
3115  "on the calling process.");
3116  if (! indicesAreAllocated ()) {
3117  allocateIndices (LocalIndices);
3118  }
3119 
3120 #ifdef HAVE_TPETRA_DEBUG
3121  constexpr bool debug = true;
3122 #else
3123  constexpr bool debug = false;
3124 #endif // HAVE_TPETRA_DEBUG
3125 
3126  if (debug) {
3127  // In debug mode, if the graph has a column Map, test whether any
3128  // of the given column indices are not in the column Map. Keep
3129  // track of the invalid column indices so we can tell the user
3130  // about them.
3131  if (hasColMap ()) {
3132  using Teuchos::Array;
3133  using Teuchos::toString;
3134  using std::endl;
3135  typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type size_type;
3136 
3137  const map_type& colMap = *colMap_;
3138  Array<LocalOrdinal> badColInds;
3139  bool allInColMap = true;
3140  for (size_type k = 0; k < indices.size (); ++k) {
3141  if (! colMap.isNodeLocalElement (indices[k])) {
3142  allInColMap = false;
3143  badColInds.push_back (indices[k]);
3144  }
3145  }
3146  if (! allInColMap) {
3147  std::ostringstream os;
3148  os << "Tpetra::CrsGraph::insertLocalIndices: You attempted to insert "
3149  "entries in owned row " << localRow << ", at the following column "
3150  "indices: " << toString (indices) << "." << endl;
3151  os << "Of those, the following indices are not in the column Map on "
3152  "this process: " << toString (badColInds) << "." << endl << "Since "
3153  "the graph has a column Map already, it is invalid to insert entries "
3154  "at those locations.";
3155  TEUCHOS_TEST_FOR_EXCEPTION(! allInColMap, std::invalid_argument, os.str ());
3156  }
3157  }
3158  }
3159 
3160  insertLocalIndicesImpl (localRow, indices);
3161 
3162  if (debug) {
3163  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3164  (! indicesAreAllocated () || ! isLocallyIndexed (), std::logic_error,
3165  "At the end of insertLocalIndices, ! indicesAreAllocated() || "
3166  "! isLocallyIndexed() is true. Please report this bug to the "
3167  "Tpetra developers.");
3168  }
3169  }
3170 
3171  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3172  void
3174  insertLocalIndices (const LocalOrdinal localRow,
3175  const LocalOrdinal numEnt,
3176  const LocalOrdinal inds[])
3177  {
3178  Teuchos::ArrayView<const LocalOrdinal> indsT (inds, numEnt);
3179  this->insertLocalIndices (localRow, indsT);
3180  }
3181 
3182 
3183  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3184  void
3186  insertGlobalIndices (const GlobalOrdinal gblRow,
3187  const LocalOrdinal numInputInds,
3188  const GlobalOrdinal inputGblColInds[])
3189  {
3190  typedef LocalOrdinal LO;
3191  const char tfecfFuncName[] = "insertGlobalIndices: ";
3192 #ifdef HAVE_TPETRA_DEBUG
3193  constexpr bool debug = true;
3194 #else
3195  constexpr bool debug = false;
3196 #endif // HAVE_TPETRA_DEBUG
3197 
3198  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3199  (this->isLocallyIndexed (), std::runtime_error,
3200  "graph indices are local; use insertLocalIndices().");
3201  // This can't really be satisfied for now, because if we are
3202  // fillComplete(), then we are local. In the future, this may
3203  // change. However, the rule that modification require active
3204  // fill will not change.
3205  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3206  (! this->isFillActive (), std::runtime_error,
3207  "You are not allowed to call this method if fill is not active. "
3208  "If fillComplete has been called, you must first call resumeFill "
3209  "before you may insert indices.");
3210  if (! this->indicesAreAllocated ()) {
3211  this->allocateIndices (GlobalIndices);
3212  }
3213  const LO lclRow = this->rowMap_->getLocalElement (gblRow);
3214  if (lclRow != Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
3215  if (debug) {
3216  if (this->hasColMap ()) {
3217  using std::endl;
3218  const map_type& colMap = * (this->colMap_);
3219  // In a debug build, keep track of the nonowned ("bad") column
3220  // indices, so that we can display them in the exception
3221  // message. In a release build, just ditch the loop early if
3222  // we encounter a nonowned column index.
3223  std::vector<GlobalOrdinal> badColInds;
3224  bool allInColMap = true;
3225  for (LO k = 0; k < numInputInds; ++k) {
3226  if (! colMap.isNodeGlobalElement (inputGblColInds[k])) {
3227  allInColMap = false;
3228  badColInds.push_back (inputGblColInds[k]);
3229  }
3230  }
3231  if (! allInColMap) {
3232  std::ostringstream os;
3233  os << "You attempted to insert entries in owned row " << gblRow
3234  << ", at the following column indices: [";
3235  for (LO k = 0; k < numInputInds; ++k) {
3236  os << inputGblColInds[k];
3237  if (k + static_cast<LO> (1) < numInputInds) {
3238  os << ",";
3239  }
3240  }
3241  os << "]." << endl << "Of those, the following indices are not in "
3242  "the column Map on this process: [";
3243  for (size_t k = 0; k < badColInds.size (); ++k) {
3244  os << badColInds[k];
3245  if (k + size_t (1) < badColInds.size ()) {
3246  os << ",";
3247  }
3248  }
3249  os << "]." << endl << "Since the matrix has a column Map already, "
3250  "it is invalid to insert entries at those locations.";
3251  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3252  (true, std::invalid_argument, os.str ());
3253  }
3254  }
3255  } // debug
3256  this->insertGlobalIndicesImpl (lclRow, inputGblColInds, numInputInds);
3257  }
3258  else { // a nonlocal row
3259  this->insertGlobalIndicesIntoNonownedRows (gblRow, inputGblColInds,
3260  numInputInds);
3261  }
3262  }
3263 
3264 
3265  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3266  void
3268  insertGlobalIndices (const GlobalOrdinal gblRow,
3269  const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds)
3270  {
3271  this->insertGlobalIndices (gblRow, inputGblColInds.size (),
3272  inputGblColInds.getRawPtr ());
3273  }
3274 
3275 
3276  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3277  void
3279  insertGlobalIndicesFiltered (const LocalOrdinal lclRow,
3280  const GlobalOrdinal gblColInds[],
3281  const LocalOrdinal numGblColInds)
3282  {
3283  typedef LocalOrdinal LO;
3284  typedef GlobalOrdinal GO;
3285  const char tfecfFuncName[] = "insertGlobalIndicesFiltered: ";
3286 
3287  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3288  (this->isLocallyIndexed (), std::runtime_error,
3289  "Graph indices are local; use insertLocalIndices().");
3290  // This can't really be satisfied for now, because if we are
3291  // fillComplete(), then we are local. In the future, this may
3292  // change. However, the rule that modification require active
3293  // fill will not change.
3294  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3295  (! this->isFillActive (), std::runtime_error,
3296  "You are not allowed to call this method if fill is not active. "
3297  "If fillComplete has been called, you must first call resumeFill "
3298  "before you may insert indices.");
3299  if (! this->indicesAreAllocated ()) {
3300  this->allocateIndices (GlobalIndices);
3301  }
3302 
3303  Teuchos::ArrayView<const GO> gblColInds_av (gblColInds, numGblColInds);
3304  // If we have a column Map, use it to filter the entries.
3305  if (! this->colMap_.is_null ()) {
3306  const map_type& colMap = * (this->colMap_);
3307 
3308  LO curOffset = 0;
3309  while (curOffset < numGblColInds) {
3310  // Find a sequence of input indices that are in the column Map
3311  // on the calling process. Doing a sequence at a time,
3312  // instead of one at a time, amortizes some overhead.
3313  LO endOffset = curOffset;
3314  for ( ; endOffset < numGblColInds; ++endOffset) {
3315  const LO lclCol = colMap.getLocalElement (gblColInds[endOffset]);
3316  if (lclCol == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
3317  break; // first entry, in current sequence, not in the column Map
3318  }
3319  }
3320  // curOffset, endOffset: half-exclusive range of indices in
3321  // the column Map on the calling process. If endOffset ==
3322  // curOffset, the range is empty.
3323  const LO numIndInSeq = (endOffset - curOffset);
3324  if (numIndInSeq != 0) {
3325  this->insertGlobalIndicesImpl (lclRow, gblColInds + curOffset,
3326  numIndInSeq);
3327  }
3328  // Invariant before this line: Either endOffset ==
3329  // numGblColInds, or gblColInds[endOffset] is not in the
3330  // column Map on the calling process.
3331  curOffset = endOffset + 1;
3332  }
3333  }
3334  else {
3335  this->insertGlobalIndicesImpl (lclRow, gblColInds_av.getRawPtr (),
3336  gblColInds_av.size ());
3337  }
3338  }
3339 
3340  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3341  void
3343  insertGlobalIndicesIntoNonownedRows (const GlobalOrdinal gblRow,
3344  const GlobalOrdinal gblColInds[],
3345  const LocalOrdinal numGblColInds)
3346  {
3347  // This creates the std::vector if it doesn't exist yet.
3348  // std::map's operator[] does a lookup each time, so it's better
3349  // to pull nonlocals_[grow] out of the loop.
3350  std::vector<GlobalOrdinal>& nonlocalRow = this->nonlocals_[gblRow];
3351  for (LocalOrdinal k = 0; k < numGblColInds; ++k) {
3352  // FIXME (mfh 20 Jul 2017) Would be better to use a set, in
3353  // order to avoid duplicates. globalAssemble() sorts these
3354  // anyway.
3355  nonlocalRow.push_back (gblColInds[k]);
3356  }
3357  }
3358 
3359  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3360  void
3362  removeLocalIndices (LocalOrdinal lrow)
3363  {
3364  const char tfecfFuncName[] = "removeLocalIndices: ";
3365  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3366  ! isFillActive (), std::runtime_error, "requires that fill is active.");
3367  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3368  isStorageOptimized (), std::runtime_error,
3369  "cannot remove indices after optimizeStorage() has been called.");
3370  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3371  isGloballyIndexed (), std::runtime_error, "graph indices are global.");
3372  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3373  ! rowMap_->isNodeLocalElement (lrow), std::runtime_error,
3374  "Local row " << lrow << " is not in the row Map on the calling process.");
3375  if (! indicesAreAllocated ()) {
3376  allocateIndices (LocalIndices);
3377  }
3378 
3379  // FIXME (mfh 13 Aug 2014) What if they haven't been cleared on
3380  // all processes?
3381  clearGlobalConstants ();
3382 
3383  if (k_numRowEntries_.extent (0) != 0) {
3384  this->k_numRowEntries_(lrow) = 0;
3385  }
3386 #ifdef HAVE_TPETRA_DEBUG
3387  constexpr bool debug = true;
3388 #else
3389  constexpr bool debug = false;
3390 #endif // HAVE_TPETRA_DEBUG
3391 
3392  if (debug) {
3393  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3394  (getNumEntriesInLocalRow (lrow) != 0 ||
3395  ! indicesAreAllocated () ||
3396  ! isLocallyIndexed (), std::logic_error,
3397  "Violated stated post-conditions. Please contact Tpetra team.");
3398  }
3399  }
3400 
3401 
3402  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3403  void
3405  setAllIndices (const typename local_graph_type::row_map_type& rowPointers,
3406  const typename local_graph_type::entries_type::non_const_type& columnIndices)
3407  {
3408  const char tfecfFuncName[] = "setAllIndices: ";
3409  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3410  ! hasColMap () || getColMap ().is_null (), std::runtime_error,
3411  "The graph must have a column Map before you may call this method.");
3412  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3413  static_cast<size_t> (rowPointers.size ()) != this->getNodeNumRows () + 1,
3414  std::runtime_error, "rowPointers.size() = " << rowPointers.size () <<
3415  " != this->getNodeNumRows()+1 = " << (this->getNodeNumRows () + 1) <<
3416  ".");
3417 
3418  // FIXME (mfh 07 Aug 2014) We need to relax this restriction,
3419  // since the future model will be allocation at construction, not
3420  // lazy allocation on first insert.
3421  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3422  ((this->k_lclInds1D_.extent (0) != 0 || this->k_gblInds1D_.extent (0) != 0),
3423  std::runtime_error, "You may not call this method if 1-D data "
3424  "structures are already allocated.");
3425 
3426  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3427  (this->lclInds2D_ != Teuchos::null ||
3428  this->gblInds2D_ != Teuchos::null,
3429  std::runtime_error, "You may not call this method if 2-D data "
3430  "structures are already allocated.");
3431 
3432  indicesAreAllocated_ = true;
3433  indicesAreLocal_ = true;
3434  pftype_ = StaticProfile; // if the profile wasn't static before, it sure is now.
3435  k_lclInds1D_ = columnIndices;
3436  k_rowPtrs_ = rowPointers;
3437  // Storage MUST be packed, since the interface doesn't give any
3438  // way to indicate any extra space at the end of each row.
3439  storageStatus_ = ::Tpetra::Details::STORAGE_1D_PACKED;
3440 
3441  // Build the local graph.
3442  lclGraph_ = local_graph_type (k_lclInds1D_, k_rowPtrs_);
3443 
3444  // These normally get cleared out at the end of allocateIndices.
3445  // It makes sense to clear them out here, because at the end of
3446  // this method, the graph is allocated on the calling process.
3447  numAllocForAllRows_ = 0;
3448  k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
3449 
3450  checkInternalState ();
3451  }
3452 
3453 
3454  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3455  void
3457  setAllIndices (const Teuchos::ArrayRCP<size_t>& rowPointers,
3458  const Teuchos::ArrayRCP<LocalOrdinal>& columnIndices)
3459  {
3460  using Kokkos::View;
3461  typedef typename local_graph_type::row_map_type row_map_type;
3462  typedef typename row_map_type::array_layout layout_type;
3463  typedef typename row_map_type::non_const_value_type row_offset_type;
3464  typedef View<size_t*, layout_type , Kokkos::HostSpace,
3465  Kokkos::MemoryUnmanaged> input_view_type;
3466  typedef typename row_map_type::non_const_type nc_row_map_type;
3467 
3468  const size_t size = static_cast<size_t> (rowPointers.size ());
3469  constexpr bool same = std::is_same<size_t, row_offset_type>::value;
3470  input_view_type ptr_in (rowPointers.getRawPtr (), size);
3471 
3472  nc_row_map_type ptr_rot ("Tpetra::CrsGraph::ptr", size);
3473 
3474  if (same) { // size_t == row_offset_type
3475  // This compile-time logic ensures that the compiler never sees
3476  // an assignment of View<row_offset_type*, ...> to View<size_t*,
3477  // ...> unless size_t == row_offset_type.
3478  input_view_type ptr_decoy (rowPointers.getRawPtr (), size); // never used
3479  Kokkos::deep_copy (Kokkos::Impl::if_c<same,
3480  nc_row_map_type,
3481  input_view_type>::select (ptr_rot, ptr_decoy),
3482  ptr_in);
3483  }
3484  else { // size_t != row_offset_type
3485  // CudaUvmSpace != HostSpace, so this will be false in that case.
3486  constexpr bool inHostMemory =
3487  std::is_same<typename row_map_type::memory_space,
3488  Kokkos::HostSpace>::value;
3489  if (inHostMemory) {
3490  // Copy (with cast from size_t to row_offset_type, with bounds
3491  // checking if necessary) to ptr_rot.
3492  ::Tpetra::Details::copyOffsets (ptr_rot, ptr_in);
3493  }
3494  else { // Copy input row offsets to device first.
3495  //
3496  // FIXME (mfh 24 Mar 2015) If CUDA UVM, running in the host's
3497  // execution space would avoid the double copy.
3498  //
3499  View<size_t*, layout_type ,execution_space > ptr_st ("Tpetra::CrsGraph::ptr", size);
3500  Kokkos::deep_copy (ptr_st, ptr_in);
3501  // Copy on device (casting from size_t to row_offset_type,
3502  // with bounds checking if necessary) to ptr_rot. This
3503  // executes in the output View's execution space, which is the
3504  // same as execution_space.
3505  ::Tpetra::Details::copyOffsets (ptr_rot, ptr_st);
3506  }
3507  }
3508 
3509  Kokkos::View<LocalOrdinal*, layout_type , execution_space > k_ind =
3510  Kokkos::Compat::getKokkosViewDeepCopy<device_type> (columnIndices ());
3511  setAllIndices (ptr_rot, k_ind);
3512  }
3513 
3514 
3515  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3516  void
3518  getNumEntriesPerLocalRowUpperBound (Teuchos::ArrayRCP<const size_t>& boundPerLocalRow,
3519  size_t& boundForAllLocalRows,
3520  bool& boundSameForAllLocalRows) const
3521  {
3522  const char tfecfFuncName[] = "getNumEntriesPerLocalRowUpperBound: ";
3523  const char suffix[] = " Please report this bug to the Tpetra developers.";
3524 
3525  // The three output arguments. We assign them to the actual
3526  // output arguments at the end, in order to implement
3527  // transactional semantics.
3528  Teuchos::ArrayRCP<const size_t> numEntriesPerRow;
3529  size_t numEntriesForAll = 0;
3530  bool allRowsSame = true;
3531 
3532  const ptrdiff_t numRows = static_cast<ptrdiff_t> (this->getNodeNumRows ());
3533 
3534  if (this->indicesAreAllocated ()) {
3535  if (this->isStorageOptimized ()) {
3536  // left with the case that we have optimized storage. in this
3537  // case, we have to construct a list of row sizes.
3538  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3539  (this->getProfileType () != StaticProfile, std::logic_error,
3540  "The graph is not StaticProfile, but storage appears to be optimized."
3541  << suffix);
3542  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3543  (numRows != 0 && k_rowPtrs_.extent (0) == 0, std::logic_error,
3544  "The graph has " << numRows << " (> 0) row" << (numRows != 1 ? "s" : "")
3545  << " on the calling process, but the k_rowPtrs_ array has zero entries."
3546  << suffix);
3547  Teuchos::ArrayRCP<size_t> numEnt;
3548  if (numRows != 0) {
3549  numEnt = Teuchos::arcp<size_t> (numRows);
3550  }
3551 
3552  // We have to iterate through the row offsets anyway, so we
3553  // might as well check whether all rows' bounds are the same.
3554  bool allRowsReallySame = false;
3555  for (ptrdiff_t i = 0; i < numRows; ++i) {
3556  numEnt[i] = this->k_rowPtrs_(i+1) - this->k_rowPtrs_(i);
3557  if (i != 0 && numEnt[i] != numEnt[i-1]) {
3558  allRowsReallySame = false;
3559  }
3560  }
3561  if (allRowsReallySame) {
3562  if (numRows == 0) {
3563  numEntriesForAll = 0;
3564  } else {
3565  numEntriesForAll = numEnt[1] - numEnt[0];
3566  }
3567  allRowsSame = true;
3568  }
3569  else {
3570  numEntriesPerRow = numEnt; // Teuchos::arcp_const_cast<const size_t> (numEnt);
3571  allRowsSame = false; // conservatively; we don't check the array
3572  }
3573  }
3574  else if (k_numRowEntries_.extent (0) != 0) {
3575  // This is a shallow copy; the ArrayRCP wraps the View in a
3576  // custom destructor, which ensures correct deallocation if
3577  // that is the only reference to the View. Furthermore, this
3578  // View is a host View, so this doesn't assume UVM.
3579  numEntriesPerRow = Kokkos::Compat::persistingView (k_numRowEntries_);
3580  allRowsSame = false; // conservatively; we don't check the array
3581  }
3582  else {
3583  numEntriesForAll = 0;
3584  allRowsSame = true;
3585  }
3586  }
3587  else { // indices not allocated
3588  if (k_numAllocPerRow_.extent (0) != 0) {
3589  // This is a shallow copy; the ArrayRCP wraps the View in a
3590  // custom destructor, which ensures correct deallocation if
3591  // that is the only reference to the View. Furthermore, this
3592  // View is a host View, so this doesn't assume UVM.
3593  numEntriesPerRow = Kokkos::Compat::persistingView (k_numAllocPerRow_);
3594  allRowsSame = false; // conservatively; we don't check the array
3595  }
3596  else {
3597  numEntriesForAll = numAllocForAllRows_;
3598  allRowsSame = true;
3599  }
3600  }
3601 
3602  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3603  (numEntriesForAll != 0 && numEntriesPerRow.size () != 0, std::logic_error,
3604  "numEntriesForAll and numEntriesPerRow are not consistent. The former "
3605  "is nonzero (" << numEntriesForAll << "), but the latter has nonzero "
3606  "size " << numEntriesPerRow.size () << "." << suffix);
3607  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3608  (numEntriesForAll != 0 && ! allRowsSame, std::logic_error,
3609  "numEntriesForAll and allRowsSame are not consistent. The former "
3610  "is nonzero (" << numEntriesForAll << "), but the latter is false."
3611  << suffix);
3612  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3613  (numEntriesPerRow.size () != 0 && allRowsSame, std::logic_error,
3614  "numEntriesPerRow and allRowsSame are not consistent. The former has "
3615  "nonzero length " << numEntriesForAll << ", but the latter is true."
3616  << suffix);
3617 
3618  boundPerLocalRow = numEntriesPerRow;
3619  boundForAllLocalRows = numEntriesForAll;
3620  boundSameForAllLocalRows = allRowsSame;
3621  }
3622 
3623 
3624  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3625  void
3628  {
3629  using Teuchos::Comm;
3630  using Teuchos::outArg;
3631  using Teuchos::RCP;
3632  using Teuchos::rcp;
3633  using Teuchos::REDUCE_MAX;
3634  using Teuchos::REDUCE_MIN;
3635  using Teuchos::reduceAll;
3636  typedef CrsGraph<LocalOrdinal, GlobalOrdinal, Node> crs_graph_type;
3637  typedef LocalOrdinal LO;
3638  typedef GlobalOrdinal GO;
3639  typedef typename Teuchos::Array<GO>::size_type size_type;
3640  const char tfecfFuncName[] = "globalAssemble: "; // for exception macro
3641 
3642  RCP<const Comm<int> > comm = getComm ();
3643 
3644  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3645  (! isFillActive (), std::runtime_error, "Fill must be active before "
3646  "you may call this method.");
3647 
3648  const size_t myNumNonlocalRows = this->nonlocals_.size ();
3649 
3650  // If no processes have nonlocal rows, then we don't have to do
3651  // anything. Checking this is probably cheaper than constructing
3652  // the Map of nonlocal rows (see below) and noticing that it has
3653  // zero global entries.
3654  {
3655  const int iHaveNonlocalRows = (myNumNonlocalRows == 0) ? 0 : 1;
3656  int someoneHasNonlocalRows = 0;
3657  reduceAll<int, int> (*comm, REDUCE_MAX, iHaveNonlocalRows,
3658  outArg (someoneHasNonlocalRows));
3659  if (someoneHasNonlocalRows == 0) {
3660  return; // no process has nonlocal rows, so nothing to do
3661  }
3662  }
3663 
3664  // 1. Create a list of the "nonlocal" rows on each process. this
3665  // requires iterating over nonlocals_, so while we do this,
3666  // deduplicate the entries and get a count for each nonlocal
3667  // row on this process.
3668  // 2. Construct a new row Map corresponding to those rows. This
3669  // Map is likely overlapping. We know that the Map is not
3670  // empty on all processes, because the above all-reduce and
3671  // return exclude that case.
3672 
3673  RCP<const map_type> nonlocalRowMap;
3674  // Keep this for CrsGraph's constructor, so we can use StaticProfile.
3675  Teuchos::Array<size_t> numEntPerNonlocalRow (myNumNonlocalRows);
3676  {
3677  Teuchos::Array<GO> myNonlocalGblRows (myNumNonlocalRows);
3678  size_type curPos = 0;
3679  for (auto mapIter = this->nonlocals_.begin ();
3680  mapIter != this->nonlocals_.end ();
3681  ++mapIter, ++curPos) {
3682  myNonlocalGblRows[curPos] = mapIter->first;
3683  std::vector<GO>& gblCols = mapIter->second; // by ref; change in place
3684  std::sort (gblCols.begin (), gblCols.end ());
3685  auto vecLast = std::unique (gblCols.begin (), gblCols.end ());
3686  gblCols.erase (vecLast, gblCols.end ());
3687  numEntPerNonlocalRow[curPos] = gblCols.size ();
3688  }
3689 
3690  // Currently, Map requires that its indexBase be the global min
3691  // of all its global indices. Map won't compute this for us, so
3692  // we must do it. If our process has no nonlocal rows, set the
3693  // "min" to the max possible GO value. This ensures that if
3694  // some process has at least one nonlocal row, then it will pick
3695  // that up as the min. We know that at least one process has a
3696  // nonlocal row, since the all-reduce and return at the top of
3697  // this method excluded that case.
3698  GO myMinNonlocalGblRow = std::numeric_limits<GO>::max ();
3699  {
3700  auto iter = std::min_element (myNonlocalGblRows.begin (),
3701  myNonlocalGblRows.end ());
3702  if (iter != myNonlocalGblRows.end ()) {
3703  myMinNonlocalGblRow = *iter;
3704  }
3705  }
3706  GO gblMinNonlocalGblRow = 0;
3707  reduceAll<int, GO> (*comm, REDUCE_MIN, myMinNonlocalGblRow,
3708  outArg (gblMinNonlocalGblRow));
3709  const GO indexBase = gblMinNonlocalGblRow;
3710  const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid ();
3711  nonlocalRowMap = rcp (new map_type (INV, myNonlocalGblRows (), indexBase, comm));
3712  }
3713 
3714  // 3. Use the column indices for each nonlocal row, as stored in
3715  // nonlocals_, to construct a CrsGraph corresponding to
3716  // nonlocal rows. We may use StaticProfile, since we have
3717  // exact counts of the number of entries in each nonlocal row.
3718 
3719  RCP<crs_graph_type> nonlocalGraph =
3720  rcp (new crs_graph_type (nonlocalRowMap, numEntPerNonlocalRow (),
3721  StaticProfile));
3722  {
3723  size_type curPos = 0;
3724  for (auto mapIter = this->nonlocals_.begin ();
3725  mapIter != this->nonlocals_.end ();
3726  ++mapIter, ++curPos) {
3727  const GO gblRow = mapIter->first;
3728  std::vector<GO>& gblCols = mapIter->second; // by ref just to avoid copy
3729  const LO numEnt = static_cast<LO> (numEntPerNonlocalRow[curPos]);
3730  nonlocalGraph->insertGlobalIndices (gblRow, numEnt, gblCols.data ());
3731  }
3732  }
3733  // There's no need to fill-complete the nonlocals graph.
3734  // We just use it as a temporary container for the Export.
3735 
3736  // 4. If the original row Map is one to one, then we can Export
3737  // directly from nonlocalGraph into this. Otherwise, we have
3738  // to create a temporary graph with a one-to-one row Map,
3739  // Export into that, then Import from the temporary graph into
3740  // *this.
3741 
3742  auto origRowMap = this->getRowMap ();
3743  const bool origRowMapIsOneToOne = origRowMap->isOneToOne ();
3744 
3745  if (origRowMapIsOneToOne) {
3746  export_type exportToOrig (nonlocalRowMap, origRowMap);
3747  this->doExport (*nonlocalGraph, exportToOrig, Tpetra::INSERT);
3748  // We're done at this point!
3749  }
3750  else {
3751  // If you ask a Map whether it is one to one, it does some
3752  // communication and stashes intermediate results for later use
3753  // by createOneToOne. Thus, calling createOneToOne doesn't cost
3754  // much more then the original cost of calling isOneToOne.
3755  auto oneToOneRowMap = Tpetra::createOneToOne (origRowMap);
3756  export_type exportToOneToOne (nonlocalRowMap, oneToOneRowMap);
3757 
3758  // Create a temporary graph with the one-to-one row Map.
3759  //
3760  // TODO (mfh 09 Sep 2016) Estimate the number of entries in each
3761  // row, to avoid reallocation during the Export operation.
3762  crs_graph_type oneToOneGraph (oneToOneRowMap, 0);
3763  // Export from graph of nonlocals into the temp one-to-one graph.
3764  oneToOneGraph.doExport (*nonlocalGraph, exportToOneToOne, Tpetra::INSERT);
3765 
3766  // We don't need the graph of nonlocals anymore, so get rid of
3767  // it, to keep the memory high-water mark down.
3768  nonlocalGraph = Teuchos::null;
3769 
3770  // Import from the one-to-one graph to the original graph.
3771  import_type importToOrig (oneToOneRowMap, origRowMap);
3772  this->doImport (oneToOneGraph, importToOrig, Tpetra::INSERT);
3773  }
3774 
3775  // It's safe now to clear out nonlocals_, since we've already
3776  // committed side effects to *this. The standard idiom for
3777  // clearing a Container like std::map, is to swap it with an empty
3778  // Container and let the swapped Container fall out of scope.
3779  decltype (this->nonlocals_) newNonlocals;
3780  std::swap (this->nonlocals_, newNonlocals);
3781 
3782  checkInternalState ();
3783  }
3784 
3785 
3786  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3787  void
3789  resumeFill (const Teuchos::RCP<Teuchos::ParameterList>& params)
3790  {
3791  clearGlobalConstants();
3792  if (params != Teuchos::null) this->setParameterList (params);
3793  lowerTriangular_ = false;
3794  upperTriangular_ = false;
3795  // either still sorted/merged or initially sorted/merged
3796  indicesAreSorted_ = true;
3797  noRedundancies_ = true;
3798  fillComplete_ = false;
3799  }
3800 
3801 
3802  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3803  void
3805  fillComplete (const Teuchos::RCP<Teuchos::ParameterList>& params)
3806  {
3807  // If the graph already has domain and range Maps, don't clobber
3808  // them. If it doesn't, use the current row Map for both the
3809  // domain and range Maps.
3810  //
3811  // NOTE (mfh 28 Sep 2014): If the graph was constructed without a
3812  // column Map, and column indices are inserted which are not in
3813  // the row Map on any process, this will cause troubles. However,
3814  // that is not a common case for most applications that we
3815  // encounter, and checking for it might require more
3816  // communication.
3817  Teuchos::RCP<const map_type> domMap = this->getDomainMap ();
3818  if (domMap.is_null ()) {
3819  domMap = this->getRowMap ();
3820  }
3821  Teuchos::RCP<const map_type> ranMap = this->getRangeMap ();
3822  if (ranMap.is_null ()) {
3823  ranMap = this->getRowMap ();
3824  }
3825  this->fillComplete (domMap, ranMap, params);
3826  }
3827 
3828 
3829  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3830  void
3832  fillComplete (const Teuchos::RCP<const map_type>& domainMap,
3833  const Teuchos::RCP<const map_type>& rangeMap,
3834  const Teuchos::RCP<Teuchos::ParameterList>& params)
3835  {
3836  const char tfecfFuncName[] = "fillComplete: ";
3837  const bool debug = ::Tpetra::Details::Behavior::debug ();
3838 
3839  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3840  (! isFillActive () || isFillComplete (), std::runtime_error,
3841  "Graph fill state must be active (isFillActive() "
3842  "must be true) before calling fillComplete().");
3843 
3844  const int numProcs = getComm ()->getSize ();
3845 
3846  //
3847  // Read and set parameters
3848  //
3849 
3850  // Does the caller want to sort remote GIDs (within those owned by
3851  // the same process) in makeColMap()?
3852  if (! params.is_null ()) {
3853  if (params->isParameter ("sort column map ghost gids")) {
3854  sortGhostsAssociatedWithEachProcessor_ =
3855  params->get<bool> ("sort column map ghost gids",
3856  sortGhostsAssociatedWithEachProcessor_);
3857  }
3858  else if (params->isParameter ("Sort column Map ghost GIDs")) {
3859  sortGhostsAssociatedWithEachProcessor_ =
3860  params->get<bool> ("Sort column Map ghost GIDs",
3861  sortGhostsAssociatedWithEachProcessor_);
3862  }
3863  }
3864 
3865  // If true, the caller promises that no process did nonlocal
3866  // changes since the last call to fillComplete.
3867  bool assertNoNonlocalInserts = false;
3868  if (! params.is_null ()) {
3869  assertNoNonlocalInserts =
3870  params->get<bool> ("No Nonlocal Changes", assertNoNonlocalInserts);
3871  }
3872 
3873  //
3874  // Allocate indices, if they haven't already been allocated
3875  //
3876  if (! indicesAreAllocated ()) {
3877  if (hasColMap ()) {
3878  // We have a column Map, so use local indices.
3879  allocateIndices (LocalIndices);
3880  } else {
3881  // We don't have a column Map, so use global indices.
3882  allocateIndices (GlobalIndices);
3883  }
3884  }
3885 
3886  //
3887  // Do global assembly, if requested and if the communicator
3888  // contains more than one process.
3889  //
3890  const bool mayNeedGlobalAssemble = ! assertNoNonlocalInserts && numProcs > 1;
3891  if (mayNeedGlobalAssemble) {
3892  // This first checks if we need to do global assembly.
3893  // The check costs a single all-reduce.
3894  globalAssemble ();
3895  }
3896  else {
3897  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3898  (numProcs > 1 && this->nonlocals_.size() > 0, std::runtime_error,
3899  "The graph's communicator contains only one process, "
3900  "but there are nonlocal entries. "
3901  "This probably means that invalid entries were added to the graph.");
3902  }
3903 
3904  // Set domain and range Map. This may clear the Import / Export
3905  // objects if the new Maps differ from any old ones.
3906  setDomainRangeMaps (domainMap, rangeMap);
3907 
3908  // If the graph does not already have a column Map (either from
3909  // the user constructor calling the version of the constructor
3910  // that takes a column Map, or from a previous fillComplete call),
3911  // then create it.
3912  Teuchos::Array<int> remotePIDs (0);
3913  const bool mustBuildColMap = ! this->hasColMap ();
3914  if (mustBuildColMap) {
3915  this->makeColMap (remotePIDs); // resized on output
3916  }
3917 
3918  // Make indices local, if they aren't already.
3919  // The method doesn't do any work if the indices are already local.
3920  const std::pair<size_t, std::string> makeIndicesLocalResult =
3921  this->makeIndicesLocal ();
3922  if (debug) { // In debug mode, print error output on all processes
3923  using ::Tpetra::Details::gathervPrint;
3924  using Teuchos::RCP;
3925  using Teuchos::REDUCE_MIN;
3926  using Teuchos::reduceAll;
3927  using Teuchos::outArg;
3928 
3929  RCP<const map_type> map = this->getMap ();
3930  RCP<const Teuchos::Comm<int> > comm;
3931  if (! map.is_null ()) {
3932  comm = map->getComm ();
3933  }
3934  if (comm.is_null ()) {
3935  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3936  (makeIndicesLocalResult.first != 0, std::runtime_error,
3937  makeIndicesLocalResult.second);
3938  }
3939  else {
3940  const int lclSuccess = (makeIndicesLocalResult.first == 0);
3941  int gblSuccess = 0; // output argument
3942  reduceAll (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
3943  if (gblSuccess != 1) {
3944  std::ostringstream os;
3945  gathervPrint (os, makeIndicesLocalResult.second, *comm);
3946  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3947  (true, std::runtime_error, os.str ());
3948  }
3949  }
3950  }
3951  else {
3952  // TODO (mfh 20 Jul 2017) Instead of throwing here, pass along
3953  // the error state to makeImportExport or
3954  // computeGlobalConstants, which may do all-reduces and thus may
3955  // have the opportunity to communicate that error state.
3956  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3957  (makeIndicesLocalResult.first != 0, std::runtime_error,
3958  makeIndicesLocalResult.second);
3959  }
3960 
3961  // If this process has no indices, then CrsGraph considers it
3962  // already trivially sorted and merged. Thus, this method need
3963  // not be called on all processes in the row Map's communicator.
3964  this->sortAndMergeAllIndices (this->isSorted (), this->isMerged ());
3965 
3966  // Make Import and Export objects, if they haven't been made
3967  // already. If we made a column Map above, reuse information from
3968  // that process to avoid communiation in the Import setup.
3969  this->makeImportExport (remotePIDs, mustBuildColMap);
3970 
3971  // Create the Kokkos::StaticCrsGraph, if it doesn't already exist.
3972  this->fillLocalGraph (params);
3973 
3974  const bool callComputeGlobalConstants = params.get () == nullptr ||
3975  params->get ("compute global constants", true);
3976  const bool computeLocalTriangularConstants = params.get () == nullptr ||
3977  params->get ("compute local triangular constants", true);
3978  if (callComputeGlobalConstants) {
3979  this->computeGlobalConstants (computeLocalTriangularConstants);
3980  }
3981  else {
3982  this->computeLocalConstants (computeLocalTriangularConstants);
3983  }
3984  this->fillComplete_ = true;
3985  this->checkInternalState ();
3986  }
3987 
3988 
3989  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3990  void
3992  expertStaticFillComplete (const Teuchos::RCP<const map_type>& domainMap,
3993  const Teuchos::RCP<const map_type>& rangeMap,
3994  const Teuchos::RCP<const import_type>& importer,
3995  const Teuchos::RCP<const export_type>& exporter,
3996  const Teuchos::RCP<Teuchos::ParameterList>& params)
3997  {
3998  const char tfecfFuncName[] = "expertStaticFillComplete: ";
3999 #ifdef HAVE_TPETRA_MMM_TIMINGS
4000  std::string label;
4001  if(!params.is_null())
4002  label = params->get("Timer Label",label);
4003  std::string prefix = std::string("Tpetra ")+ label + std::string(": ");
4004  using Teuchos::TimeMonitor;
4005  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Setup"))));
4006 #endif
4007 
4008 
4009  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4010  domainMap.is_null () || rangeMap.is_null (),
4011  std::runtime_error, "The input domain Map and range Map must be nonnull.");
4012  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4013  pftype_ != StaticProfile, std::runtime_error, "You may not call this "
4014  "method unless the graph is StaticProfile.");
4015  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4016  isFillComplete () || ! hasColMap (), std::runtime_error, "You may not "
4017  "call this method unless the graph has a column Map.");
4018  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4019  getNodeNumRows () > 0 && k_rowPtrs_.extent (0) == 0,
4020  std::runtime_error, "The calling process has getNodeNumRows() = "
4021  << getNodeNumRows () << " > 0 rows, but the row offsets array has not "
4022  "been set.");
4023  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4024  static_cast<size_t> (k_rowPtrs_.extent (0)) != getNodeNumRows () + 1,
4025  std::runtime_error, "The row offsets array has length " <<
4026  k_rowPtrs_.extent (0) << " != getNodeNumRows()+1 = " <<
4027  (getNodeNumRows () + 1) << ".");
4028 
4029  // Note: We don't need to do the following things which are normally done in fillComplete:
4030  // allocateIndices, globalAssemble, makeColMap, makeIndicesLocal, sortAndMergeAllIndices
4031 
4032  // Constants from allocateIndices
4033  //
4034  // mfh 08 Aug 2014: numAllocForAllRows_ and k_numAllocPerRow_ go
4035  // away once the graph is allocated. expertStaticFillComplete
4036  // either presumes that the graph is allocated, or "allocates" it.
4037  //
4038  // FIXME (mfh 08 Aug 2014) The goal for the Kokkos refactor
4039  // version of CrsGraph is to allocate in the constructor, not
4040  // lazily on first insert. That will make both
4041  // numAllocForAllRows_ and k_numAllocPerRow_ obsolete.
4042  numAllocForAllRows_ = 0;
4043  k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
4044  indicesAreAllocated_ = true;
4045 
4046  // Constants from makeIndicesLocal
4047  //
4048  // The graph has a column Map, so its indices had better be local.
4049  indicesAreLocal_ = true;
4050  indicesAreGlobal_ = false;
4051 
4052  // set domain/range map: may clear the import/export objects
4053 #ifdef HAVE_TPETRA_MMM_TIMINGS
4054  MM = Teuchos::null;
4055  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Maps"))));
4056 #endif
4057  setDomainRangeMaps (domainMap, rangeMap);
4058 
4059  // Presume the user sorted and merged the arrays first
4060  indicesAreSorted_ = true;
4061  noRedundancies_ = true;
4062 
4063  // makeImportExport won't create a new importer/exporter if I set one here first.
4064 #ifdef HAVE_TPETRA_MMM_TIMINGS
4065  MM = Teuchos::null;
4066  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckI"))));
4067 #endif
4068 
4069  importer_ = Teuchos::null;
4070  exporter_ = Teuchos::null;
4071  if (importer != Teuchos::null) {
4072  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4073  ! importer->getSourceMap ()->isSameAs (*getDomainMap ()) ||
4074  ! importer->getTargetMap ()->isSameAs (*getColMap ()),
4075  std::invalid_argument,": importer does not match matrix maps.");
4076  importer_ = importer;
4077 
4078  }
4079 
4080 #ifdef HAVE_TPETRA_MMM_TIMINGS
4081  MM = Teuchos::null;
4082  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckE"))));
4083 #endif
4084 
4085  if (exporter != Teuchos::null) {
4086  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4087  ! exporter->getSourceMap ()->isSameAs (*getRowMap ()) ||
4088  ! exporter->getTargetMap ()->isSameAs (*getRangeMap ()),
4089  std::invalid_argument,": exporter does not match matrix maps.");
4090  exporter_ = exporter;
4091  }
4092 
4093 #ifdef HAVE_TPETRA_MMM_TIMINGS
4094  MM = Teuchos::null;
4095  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXmake"))));
4096 #endif
4097  Teuchos::Array<int> remotePIDs (0); // unused output argument
4098  this->makeImportExport (remotePIDs, false);
4099 
4100  // Since we have a StaticProfile, fillLocalGraph will do the right thing...
4101 #ifdef HAVE_TPETRA_MMM_TIMINGS
4102  MM = Teuchos::null;
4103  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-fLG"))));
4104 #endif
4105  this->fillLocalGraph (params);
4106 
4107  const bool callComputeGlobalConstants = params.get () == nullptr ||
4108  params->get ("compute global constants", true);
4109  const bool computeLocalTriangularConstants = params.get () == nullptr ||
4110  params->get ("compute local triangular constants", true);
4111 
4112  if (callComputeGlobalConstants) {
4113 #ifdef HAVE_TPETRA_MMM_TIMINGS
4114  MM = Teuchos::null;
4115  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cGC (const)"))));
4116 #endif // HAVE_TPETRA_MMM_TIMINGS
4117  this->computeGlobalConstants (computeLocalTriangularConstants);
4118  }
4119  else {
4120 #ifdef HAVE_TPETRA_MMM_TIMINGS
4121  MM = Teuchos::null;
4122  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cGC (noconst)"))));
4123 #endif // HAVE_TPETRA_MMM_TIMINGS
4124  this->computeLocalConstants (computeLocalTriangularConstants);
4125  }
4126 
4127  fillComplete_ = true;
4128 
4129 #ifdef HAVE_TPETRA_MMM_TIMINGS
4130  MM = Teuchos::null;
4131  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cIS"))));
4132 #endif
4133  checkInternalState ();
4134  }
4135 
4136 
4137  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4138  void
4140  fillLocalGraph (const Teuchos::RCP<Teuchos::ParameterList>& params)
4141  {
4143  typedef decltype (k_numRowEntries_) row_entries_type;
4144  typedef typename local_graph_type::row_map_type row_map_type;
4145  typedef typename row_map_type::non_const_type non_const_row_map_type;
4146  typedef typename local_graph_type::entries_type::non_const_type lclinds_1d_type;
4147  const char tfecfFuncName[] = "fillLocalGraph (called from fillComplete or "
4148  "expertStaticFillComplete): ";
4149  const bool debug = ::Tpetra::Details::Behavior::debug ();
4150  const size_t lclNumRows = this->getNodeNumRows ();
4151 
4152  // This method's goal is to fill in the two arrays (compressed
4153  // sparse row format) that define the sparse graph's structure.
4154  //
4155  // Use the nonconst version of row_map_type for ptr_d, because
4156  // the latter is const and we need to modify ptr_d here.
4157  non_const_row_map_type ptr_d;
4158  row_map_type ptr_d_const;
4159  lclinds_1d_type ind_d;
4160 
4161  bool requestOptimizedStorage = true;
4162  if (! params.is_null () && ! params->get ("Optimize Storage", true)) {
4163  requestOptimizedStorage = false;
4164  }
4165  if (this->getProfileType () != StaticProfile) {
4166  // Pack 2-D storage (DynamicProfile) into 1-D packed storage.
4167  //
4168  // DynamicProfile means that the graph's column indices are
4169  // currently stored in a 2-D "unpacked" format, in the
4170  // arrays-of-arrays lclInds2D_. We allocate 1-D storage
4171  // (ind_d) and then copy from 2-D storage (lclInds2D_) into 1-D
4172  // storage (ind_d).
4173  if (debug) {
4174  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4175  (static_cast<size_t> (this->k_numRowEntries_.extent (0)) !=
4176  lclNumRows, std::logic_error, "(DynamicProfile branch) "
4177  "k_numRowEntries_.extent(0) = " << k_numRowEntries_.extent (0)
4178  << " != getNodeNumRows() = " << lclNumRows << "");
4179  }
4180 
4181  // Pack the row offsets into ptr_d, by doing a sum-scan of the
4182  // array of valid entry counts per row (k_numRowEntries_). The
4183  // pack method can handle its counts input being a host View.
4184  //
4185  // Total number of entries in the matrix on the calling
4186  // process. We will compute this in the loop below. It's
4187  // cheap to compute and useful as a sanity check.
4188  size_t lclTotalNumEntries = 0;
4189  {
4190  // Allocate the packed row offsets array.
4191  ptr_d = non_const_row_map_type ("Tpetra::CrsGraph::ptr", lclNumRows+1);
4192  typename row_entries_type::const_type numRowEnt_h = k_numRowEntries_;
4193  // This function can handle that numRowEnt_h lives on host.
4194  lclTotalNumEntries = computeOffsetsFromCounts (ptr_d, numRowEnt_h);
4195  ptr_d_const = ptr_d;
4196  }
4197 
4198  if (debug) {
4199  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4200  (static_cast<size_t> (ptr_d.extent (0)) != lclNumRows + 1,
4201  std::logic_error, "(DynamicProfile branch) After packing ptr_d, "
4202  "ptr_d.extent(0) = " << ptr_d.extent (0) << " != "
4203  "(lclNumRows+1) = " << (lclNumRows+1) << ".");
4204  {
4205  const auto valToCheck =
4206  ::Tpetra::Details::getEntryOnHost (ptr_d, lclNumRows);
4207  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4208  (valToCheck != lclTotalNumEntries, std::logic_error,
4209  "(DynamicProfile branch) After packing ptr_d, ptr_d(lclNumRows = "
4210  << lclNumRows << ") = " << valToCheck << " != total number of "
4211  "entries on the calling process = " << lclTotalNumEntries << ".");
4212  }
4213  }
4214 
4215  // Allocate the array of packed column indices.
4216  ind_d = lclinds_1d_type ("Tpetra::CrsGraph::ind", lclTotalNumEntries);
4217  // Pack the column indices. We have to do this sequentially on
4218  // host, since lclInds2D_ is an ArrayRCP<Array<LO>>, which
4219  // doesn't work in parallel kernels (its iterators aren't even
4220  // thread safe in debug mode).
4221  {
4222  auto ptr_h = Kokkos::create_mirror_view (ptr_d);
4223  Kokkos::deep_copy (ptr_h, ptr_d); // we need the entries on host
4224  auto ind_h = Kokkos::create_mirror_view (ind_d); // will fill on host
4225 
4226  // k_numRowEntries_ is a host View already, so we can use it here.
4227  typename row_entries_type::const_type numRowEnt_h = k_numRowEntries_;
4228  for (size_t row = 0; row < lclNumRows; ++row) {
4229  const size_t numEnt = numRowEnt_h(row);
4230  std::copy (lclInds2D_[row].begin (),
4231  lclInds2D_[row].begin () + numEnt,
4232  ind_h.data () + ptr_h(row));
4233  }
4234  Kokkos::deep_copy (ind_d, ind_h);
4235  }
4236 
4237  if (debug) {
4238  // Sanity check of packed row offsets.
4239  if (ptr_d.extent (0) != 0) {
4240  const size_t numOffsets = static_cast<size_t> (ptr_d.extent (0));
4241  const size_t valToCheck =
4242  ::Tpetra::Details::getEntryOnHost (ptr_d, numOffsets - 1);
4243  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4244  (valToCheck != static_cast<size_t> (ind_d.extent (0)),
4245  std::logic_error, "(DynamicProfile branch) After packing column "
4246  "indices, ptr_d(" << (numOffsets-1) << ") = " << valToCheck
4247  << " != ind_d.extent(0) = " << ind_d.extent (0) << ".");
4248  }
4249  }
4250  }
4251  else if (getProfileType () == StaticProfile) {
4252  // StaticProfile means that the graph's column indices are
4253  // currently stored in a 1-D format, with row offsets in
4254  // k_rowPtrs_ and local column indices in k_lclInds1D_.
4255 
4256  if (debug) {
4257  // StaticProfile also means that the graph's array of row
4258  // offsets must already be allocated.
4259  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4260  (k_rowPtrs_.extent (0) == 0, std::logic_error,
4261  "(StaticProfile branch) k_rowPtrs_ has size zero, but shouldn't");
4262  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4263  (k_rowPtrs_.extent (0) != lclNumRows + 1, std::logic_error,
4264  "(StaticProfile branch) k_rowPtrs_.extent(0) = "
4265  << k_rowPtrs_.extent (0) << " != (lclNumRows + 1) = "
4266  << (lclNumRows + 1) << ".");
4267  {
4268  const size_t numOffsets = k_rowPtrs_.extent (0);
4269  const auto valToCheck =
4270  ::Tpetra::Details::getEntryOnHost (k_rowPtrs_, numOffsets - 1);
4271  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4272  (numOffsets != 0 &&
4273  k_lclInds1D_.extent (0) != valToCheck,
4274  std::logic_error, "(StaticProfile branch) numOffsets = " <<
4275  numOffsets << " != 0 and k_lclInds1D_.extent(0) = " <<
4276  k_lclInds1D_.extent (0) << " != k_rowPtrs_(" << numOffsets <<
4277  ") = " << valToCheck << ".");
4278  }
4279  }
4280 
4281  size_t allocSize = 0;
4282  try {
4283  allocSize = this->getNodeAllocationSize ();
4284  }
4285  catch (std::logic_error& e) {
4286  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4287  (true, std::logic_error, "getNodeAllocationSize threw "
4288  "std::logic_error: " << e.what ());
4289  }
4290  catch (std::runtime_error& e) {
4291  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4292  (true, std::runtime_error, "getNodeAllocationSize threw "
4293  "std::runtime_error: " << e.what ());
4294  }
4295  catch (std::exception& e) {
4296  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4297  (true, std::runtime_error, "getNodeAllocationSize threw "
4298  "std::exception: " << e.what ());
4299  }
4300  catch (...) {
4301  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4302  (true, std::runtime_error, "getNodeAllocationSize threw "
4303  "an exception not a subclass of std::exception.");
4304  }
4305 
4306  if (this->getNodeNumEntries () != allocSize) {
4307  // The graph's current 1-D storage is "unpacked." This means
4308  // the row offsets may differ from what the final row offsets
4309  // should be. This could happen, for example, if the user
4310  // specified StaticProfile in the constructor and set an upper
4311  // bound on the number of entries in each row, but didn't fill
4312  // all those entries.
4313 
4314  if (debug) {
4315  if (k_rowPtrs_.extent (0) != 0) {
4316  const size_t numOffsets =
4317  static_cast<size_t> (k_rowPtrs_.extent (0));
4318  const auto valToCheck =
4319  ::Tpetra::Details::getEntryOnHost (k_rowPtrs_, numOffsets - 1);
4320  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4321  (valToCheck != static_cast<size_t> (k_lclInds1D_.extent (0)),
4322  std::logic_error, "(StaticProfile unpacked branch) Before "
4323  "allocating or packing, k_rowPtrs_(" << (numOffsets-1) << ") = "
4324  << valToCheck << " != k_lclInds1D_.extent(0) = "
4325  << k_lclInds1D_.extent (0) << ".");
4326  }
4327  }
4328 
4329  // Pack the row offsets into ptr_d, by doing a sum-scan of the
4330  // array of valid entry counts per row (k_numRowEntries_).
4331 
4332  // Total number of entries in the matrix on the calling
4333  // process. We will compute this in the loop below. It's
4334  // cheap to compute and useful as a sanity check.
4335  size_t lclTotalNumEntries = 0;
4336  {
4337  // Allocate the packed row offsets array.
4338  ptr_d = non_const_row_map_type ("Tpetra::CrsGraph::ptr", lclNumRows + 1);
4339  ptr_d_const = ptr_d;
4340 
4341  // It's ok that k_numRowEntries_ is a host View; the
4342  // function can handle this.
4343  typename row_entries_type::const_type numRowEnt_h = k_numRowEntries_;
4344  if (debug) {
4345  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4346  (static_cast<size_t> (numRowEnt_h.extent (0)) != lclNumRows,
4347  std::logic_error, "(StaticProfile unpacked branch) "
4348  "numRowEnt_h.extent(0) = " << numRowEnt_h.extent (0)
4349  << " != getNodeNumRows() = " << lclNumRows << "");
4350  }
4351 
4352  lclTotalNumEntries = computeOffsetsFromCounts (ptr_d, numRowEnt_h);
4353 
4354  if (debug) {
4355  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4356  (static_cast<size_t> (ptr_d.extent (0)) != lclNumRows + 1,
4357  std::logic_error, "(StaticProfile unpacked branch) After "
4358  "allocating ptr_d, ptr_d.extent(0) = " << ptr_d.extent (0)
4359  << " != lclNumRows+1 = " << (lclNumRows+1) << ".");
4360  {
4361  const auto valToCheck =
4362  ::Tpetra::Details::getEntryOnHost (ptr_d, lclNumRows);
4363  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4364  (valToCheck != lclTotalNumEntries, std::logic_error,
4365  "Tpetra::CrsGraph::fillLocalGraph: In StaticProfile unpacked "
4366  "branch, after filling ptr_d, ptr_d(lclNumRows=" << lclNumRows
4367  << ") = " << valToCheck << " != total number of entries on "
4368  "the calling process = " << lclTotalNumEntries << ".");
4369  }
4370  }
4371  }
4372 
4373  // Allocate the array of packed column indices.
4374  ind_d = lclinds_1d_type ("Tpetra::CrsGraph::ind", lclTotalNumEntries);
4375 
4376  // k_rowPtrs_ and k_lclInds1D_ are currently unpacked. Pack
4377  // them, using the packed row offsets array ptr_d that we
4378  // created above.
4379  //
4380  // FIXME (mfh 08 Aug 2014) If "Optimize Storage" is false (in
4381  // CrsMatrix?), we need to keep around the unpacked row
4382  // offsets and column indices.
4383 
4384  // Pack the column indices from unpacked k_lclInds1D_ into
4385  // packed ind_d. We will replace k_lclInds1D_ below.
4386  typedef pack_functor<
4387  typename local_graph_type::entries_type::non_const_type,
4388  row_map_type> inds_packer_type;
4389  inds_packer_type f (ind_d, k_lclInds1D_, ptr_d, k_rowPtrs_);
4390  {
4391  typedef typename decltype (ind_d)::execution_space exec_space;
4392  typedef Kokkos::RangePolicy<exec_space, LocalOrdinal> range_type;
4393  Kokkos::parallel_for (range_type (0, lclNumRows), f);
4394  }
4395 
4396  if (debug) {
4397  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4398  (ptr_d.extent (0) == 0, std::logic_error, "(StaticProfile "
4399  "\"Optimize Storage\"=true branch) After packing, "
4400  "ptr_d.extent(0) = 0. This probably means k_rowPtrs_ was "
4401  "never allocated.");
4402  if (ptr_d.extent (0) != 0) {
4403  const size_t numOffsets = static_cast<size_t> (ptr_d.extent (0));
4404  const auto valToCheck =
4405  ::Tpetra::Details::getEntryOnHost (ptr_d, numOffsets - 1);
4406  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4407  (static_cast<size_t> (valToCheck) != ind_d.extent (0),
4408  std::logic_error, "(StaticProfile \"Optimize Storage\"=true "
4409  "branch) After packing, ptr_d(" << (numOffsets-1) << ") = "
4410  << valToCheck << " != ind_d.extent(0) = "
4411  << ind_d.extent (0) << ".");
4412  }
4413  }
4414  }
4415  else { // We don't have to pack, so just set the pointers.
4416  ptr_d_const = k_rowPtrs_;
4417  ind_d = k_lclInds1D_;
4418 
4419  if (debug) {
4420  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4421  (ptr_d_const.extent (0) == 0, std::logic_error, "(StaticProfile "
4422  "\"Optimize Storage\"=false branch) ptr_d_const.extent(0) = 0. "
4423  "This probably means that k_rowPtrs_ was never allocated.");
4424  if (ptr_d_const.extent (0) != 0) {
4425  const size_t numOffsets =
4426  static_cast<size_t> (ptr_d_const.extent (0));
4427  const size_t valToCheck =
4428  ::Tpetra::Details::getEntryOnHost (ptr_d_const, numOffsets - 1);
4429  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4430  (valToCheck != static_cast<size_t> (ind_d.extent (0)),
4431  std::logic_error, "(StaticProfile \"Optimize Storage\"=false "
4432  "branch) ptr_d_const(" << (numOffsets-1) << ") = " << valToCheck
4433  << " != ind_d.extent(0) = " << ind_d.extent (0) << ".");
4434  }
4435  }
4436  }
4437  }
4438 
4439  if (debug) {
4440  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4441  (static_cast<size_t> (ptr_d_const.extent (0)) != lclNumRows + 1,
4442  std::logic_error, "After packing, ptr_d_const.extent(0) = " <<
4443  ptr_d_const.extent (0) << " != lclNumRows+1 = " << (lclNumRows+1)
4444  << ".");
4445  if (ptr_d_const.extent (0) != 0) {
4446  const size_t numOffsets = static_cast<size_t> (ptr_d_const.extent (0));
4447  const auto valToCheck =
4448  ::Tpetra::Details::getEntryOnHost (ptr_d_const, numOffsets - 1);
4449  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4450  (static_cast<size_t> (valToCheck) != ind_d.extent (0),
4451  std::logic_error, "After packing, ptr_d_const(" << (numOffsets-1)
4452  << ") = " << valToCheck << " != ind_d.extent(0) = "
4453  << ind_d.extent (0) << ".");
4454  }
4455  }
4456 
4457  if (requestOptimizedStorage) {
4458  // With optimized storage, we don't need to store the 2-D column
4459  // indices array-of-arrays, or the array of row entry counts.
4460 
4461  // Free graph data structures that are only needed for 2-D or
4462  // unpacked 1-D storage.
4463  lclInds2D_ = Teuchos::null;
4464  k_numRowEntries_ = row_entries_type ();
4465 
4466  // Keep the new 1-D packed allocations.
4467  k_rowPtrs_ = ptr_d_const;
4468  k_lclInds1D_ = ind_d;
4469 
4470  // The graph is definitely StaticProfile now, whether or not it
4471  // was before.
4472  pftype_ = StaticProfile;
4473  storageStatus_ = ::Tpetra::Details::STORAGE_1D_PACKED;
4474  }
4475 
4476  // FIXME (mfh 28 Aug 2014) "Local Graph" sublist no longer used.
4477 
4478  // Build the local graph.
4479  lclGraph_ = local_graph_type (ind_d, ptr_d_const);
4480  }
4481 
4482  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4483  void
4485  replaceColMap (const Teuchos::RCP<const map_type>& newColMap)
4486  {
4487  // NOTE: This safety check matches the code, but not the documentation of Crsgraph
4488  //
4489  // FIXME (mfh 18 Aug 2014) This will break if the calling process
4490  // has no entries, because in that case, currently it is neither
4491  // locally nor globally indexed. This will change once we get rid
4492  // of lazy allocation (so that the constructor allocates indices
4493  // and therefore commits to local vs. global).
4494  const char tfecfFuncName[] = "replaceColMap: ";
4495  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4496  isLocallyIndexed () || isGloballyIndexed (), std::runtime_error,
4497  "Requires matching maps and non-static graph.");
4498  colMap_ = newColMap;
4499  }
4500 
4501  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4502  void
4504  reindexColumns (const Teuchos::RCP<const map_type>& newColMap,
4505  const Teuchos::RCP<const import_type>& newImport,
4506  const bool sortIndicesInEachRow)
4507  {
4508  using Teuchos::REDUCE_MIN;
4509  using Teuchos::reduceAll;
4510  using Teuchos::RCP;
4511  typedef GlobalOrdinal GO;
4512  typedef LocalOrdinal LO;
4513  typedef typename local_graph_type::entries_type::non_const_type col_inds_type;
4514  const char tfecfFuncName[] = "reindexColumns: ";
4515 
4516  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4517  isFillComplete (), std::runtime_error, "The graph is fill complete "
4518  "(isFillComplete() returns true). You must call resumeFill() before "
4519  "you may call this method.");
4520 
4521  // mfh 19 Aug 2014: This method does NOT redistribute data; it
4522  // doesn't claim to do the work of an Import or Export. This
4523  // means that for all processes, the calling process MUST own all
4524  // column indices, in both the old column Map (if it exists) and
4525  // the new column Map. We check this via an all-reduce.
4526  //
4527  // Some processes may be globally indexed, others may be locally
4528  // indexed, and others (that have no graph entries) may be
4529  // neither. This method will NOT change the graph's current
4530  // state. If it's locally indexed, it will stay that way, and
4531  // vice versa. It would easy to add an option to convert indices
4532  // from global to local, so as to save a global-to-local
4533  // conversion pass. However, we don't do this here. The intended
4534  // typical use case is that the graph already has a column Map and
4535  // is locally indexed, and this is the case for which we optimize.
4536 
4537  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
4538 
4539  // Attempt to convert indices to the new column Map's version of
4540  // local. This will fail if on the calling process, the graph has
4541  // indices that are not on that process in the new column Map.
4542  // After the local conversion attempt, we will do an all-reduce to
4543  // see if any processes failed.
4544 
4545  // If this is false, then either the graph contains a column index
4546  // which is invalid in the CURRENT column Map, or the graph is
4547  // locally indexed but currently has no column Map. In either
4548  // case, there is no way to convert the current local indices into
4549  // global indices, so that we can convert them into the new column
4550  // Map's local indices. It's possible for this to be true on some
4551  // processes but not others, due to replaceColMap.
4552  bool allCurColIndsValid = true;
4553  // On the calling process, are all valid current column indices
4554  // also in the new column Map on the calling process? In other
4555  // words, does local reindexing suffice, or should the user have
4556  // done an Import or Export instead?
4557  bool localSuffices = true;
4558 
4559  // Final arrays for the local indices. We will allocate exactly
4560  // one of these ONLY if the graph is locally indexed on the
4561  // calling process, and ONLY if the graph has one or more entries
4562  // (is not empty) on the calling process. In that case, we
4563  // allocate the first (1-D storage) if the graph has a static
4564  // profile, else we allocate the second (2-D storage).
4565  typename local_graph_type::entries_type::non_const_type newLclInds1D;
4566  Teuchos::ArrayRCP<Teuchos::Array<LO> > newLclInds2D;
4567 
4568  // If indices aren't allocated, that means the calling process
4569  // owns no entries in the graph. Thus, there is nothing to
4570  // convert, and it trivially succeeds locally.
4571  if (indicesAreAllocated ()) {
4572  if (isLocallyIndexed ()) {
4573  if (hasColMap ()) { // locally indexed, and currently has a column Map
4574  const map_type& oldColMap = * (getColMap ());
4575  if (pftype_ == StaticProfile) {
4576  // Allocate storage for the new local indices.
4577  const size_t allocSize = this->getNodeAllocationSize ();
4578  newLclInds1D = col_inds_type ("Tpetra::CrsGraph::ind", allocSize);
4579  // Attempt to convert the new indices locally.
4580  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
4581  const RowInfo rowInfo = this->getRowInfo (lclRow);
4582  const size_t beg = rowInfo.offset1D;
4583  const size_t end = beg + rowInfo.numEntries;
4584  for (size_t k = beg; k < end; ++k) {
4585  // FIXME (mfh 21 Aug 2014) This assumes UVM. Should
4586  // use a DualView instead.
4587  const LO oldLclCol = k_lclInds1D_(k);
4588  if (oldLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
4589  allCurColIndsValid = false;
4590  break; // Stop at the first invalid index
4591  }
4592  const GO gblCol = oldColMap.getGlobalElement (oldLclCol);
4593 
4594  // The above conversion MUST succeed. Otherwise, the
4595  // current local index is invalid, which means that
4596  // the graph was constructed incorrectly.
4597  if (gblCol == Teuchos::OrdinalTraits<GO>::invalid ()) {
4598  allCurColIndsValid = false;
4599  break; // Stop at the first invalid index
4600  }
4601  else {
4602  const LO newLclCol = newColMap->getLocalElement (gblCol);
4603  if (newLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
4604  localSuffices = false;
4605  break; // Stop at the first invalid index
4606  }
4607  // FIXME (mfh 21 Aug 2014) This assumes UVM. Should
4608  // use a DualView instead.
4609  newLclInds1D(k) = newLclCol;
4610  }
4611  } // for each entry in the current row
4612  } // for each locally owned row
4613  }
4614  else { // pftype_ == DynamicProfile
4615  // Allocate storage for the new local indices. We only
4616  // allocate the outer array here; we will allocate the
4617  // inner arrays below.
4618  newLclInds2D = Teuchos::arcp<Teuchos::Array<LO> > (lclNumRows);
4619 
4620  // Attempt to convert the new indices locally.
4621  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
4622  const RowInfo rowInfo = this->getRowInfo (lclRow);
4623  newLclInds2D.resize (rowInfo.allocSize);
4624 
4625  Teuchos::ArrayView<const LO> oldLclRowView = getLocalView (rowInfo);
4626  Teuchos::ArrayView<LO> newLclRowView = (newLclInds2D[lclRow]) ();
4627 
4628  for (size_t k = 0; k < rowInfo.numEntries; ++k) {
4629  const LO oldLclCol = oldLclRowView[k];
4630  if (oldLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
4631  allCurColIndsValid = false;
4632  break; // Stop at the first invalid index
4633  }
4634  const GO gblCol = oldColMap.getGlobalElement (oldLclCol);
4635 
4636  // The above conversion MUST succeed. Otherwise, the
4637  // local index is invalid and the graph is wrong.
4638  if (gblCol == Teuchos::OrdinalTraits<GO>::invalid ()) {
4639  allCurColIndsValid = false;
4640  break; // Stop at the first invalid index
4641  }
4642  else {
4643  const LO newLclCol = newColMap->getLocalElement (gblCol);
4644  if (newLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
4645  localSuffices = false;
4646  break; // Stop at the first invalid index.
4647  }
4648  newLclRowView[k] = newLclCol;
4649  }
4650  } // for each entry in the current row
4651  } // for each locally owned row
4652  } // pftype_
4653  }
4654  else { // locally indexed, but no column Map
4655  // This case is only possible if replaceColMap() was called
4656  // with a null argument on the calling process. It's
4657  // possible, but it means that this method can't possibly
4658  // succeed, since we have no way of knowing how to convert
4659  // the current local indices to global indices.
4660  allCurColIndsValid = false;
4661  }
4662  }
4663  else { // globally indexed
4664  // If the graph is globally indexed, we don't need to save
4665  // local indices, but we _do_ need to know whether the current
4666  // global indices are valid in the new column Map. We may
4667  // need to do a getRemoteIndexList call to find this out.
4668  //
4669  // In this case, it doesn't matter whether the graph currently
4670  // has a column Map. We don't need the old column Map to
4671  // convert from global indices to the _new_ column Map's local
4672  // indices. Furthermore, we can use the same code, whether
4673  // the graph is static or dynamic profile.
4674 
4675  // Test whether the current global indices are in the new
4676  // column Map on the calling process.
4677  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
4678  const RowInfo rowInfo = this->getRowInfo (lclRow);
4679  Teuchos::ArrayView<const GO> oldGblRowView = getGlobalView (rowInfo);
4680  for (size_t k = 0; k < rowInfo.numEntries; ++k) {
4681  const GO gblCol = oldGblRowView[k];
4682  if (! newColMap->isNodeGlobalElement (gblCol)) {
4683  localSuffices = false;
4684  break; // Stop at the first invalid index
4685  }
4686  } // for each entry in the current row
4687  } // for each locally owned row
4688  } // locally or globally indexed
4689  } // whether indices are allocated
4690 
4691  // Do an all-reduce to check both possible error conditions.
4692  int lclSuccess[2];
4693  lclSuccess[0] = allCurColIndsValid ? 1 : 0;
4694  lclSuccess[1] = localSuffices ? 1 : 0;
4695  int gblSuccess[2];
4696  gblSuccess[0] = 0;
4697  gblSuccess[1] = 0;
4698  RCP<const Teuchos::Comm<int> > comm =
4699  getRowMap ().is_null () ? Teuchos::null : getRowMap ()->getComm ();
4700  if (! comm.is_null ()) {
4701  reduceAll<int, int> (*comm, REDUCE_MIN, 2, lclSuccess, gblSuccess);
4702  }
4703 
4704  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4705  gblSuccess[0] == 0, std::runtime_error, "It is not possible to continue."
4706  " The most likely reason is that the graph is locally indexed, but the "
4707  "column Map is missing (null) on some processes, due to a previous call "
4708  "to replaceColMap().");
4709 
4710  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4711  gblSuccess[1] == 0, std::runtime_error, "On some process, the graph "
4712  "contains column indices that are in the old column Map, but not in the "
4713  "new column Map (on that process). This method does NOT redistribute "
4714  "data; it does not claim to do the work of an Import or Export operation."
4715  " This means that for all processess, the calling process MUST own all "
4716  "column indices, in both the old column Map and the new column Map. In "
4717  "this case, you will need to do an Import or Export operation to "
4718  "redistribute data.");
4719 
4720  // Commit the results.
4721  if (isLocallyIndexed ()) {
4722  if (pftype_ == StaticProfile) {
4723  k_lclInds1D_ = newLclInds1D;
4724  } else { // dynamic profile
4725  lclInds2D_ = newLclInds2D;
4726  }
4727  // We've reindexed, so we don't know if the indices are sorted.
4728  //
4729  // FIXME (mfh 17 Sep 2014) It could make sense to check this,
4730  // since we're already going through all the indices above. We
4731  // could also sort each row in place; that way, we would only
4732  // have to make one pass over the rows.
4733  indicesAreSorted_ = false;
4734  if (sortIndicesInEachRow) {
4735  // NOTE (mfh 17 Sep 2014) The graph must be locally indexed in
4736  // order to call this method.
4737  //
4738  // FIXME (mfh 17 Sep 2014) This violates the strong exception
4739  // guarantee. It would be better to sort the new index arrays
4740  // before committing them.
4741  const bool sorted = false; // need to resort
4742  const bool merged = true; // no need to merge, since no dups
4743  this->sortAndMergeAllIndices (sorted, merged);
4744  }
4745  }
4746  colMap_ = newColMap;
4747 
4748  if (newImport.is_null ()) {
4749  // FIXME (mfh 19 Aug 2014) Should use the above all-reduce to
4750  // check whether the input Import is null on any process.
4751  //
4752  // If the domain Map hasn't been set yet, we can't compute a new
4753  // Import object. Leave it what it is; it should be null, but
4754  // it doesn't matter. If the domain Map _has_ been set, then
4755  // compute a new Import object if necessary.
4756  if (! domainMap_.is_null ()) {
4757  if (! domainMap_->isSameAs (* newColMap)) {
4758  importer_ = Teuchos::rcp (new import_type (domainMap_, newColMap));
4759  } else {
4760  importer_ = Teuchos::null; // don't need an Import
4761  }
4762  }
4763  } else {
4764  // The caller gave us an Import object. Assume that it's valid.
4765  importer_ = newImport;
4766  }
4767  }
4768 
4769 
4770  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4771  void
4773  replaceDomainMapAndImporter (const Teuchos::RCP<const map_type>& newDomainMap,
4774  const Teuchos::RCP<const import_type>& newImporter)
4775  {
4776  const char prefix[] = "Tpetra::CrsGraph::replaceDomainMapAndImporter: ";
4777  TEUCHOS_TEST_FOR_EXCEPTION(
4778  colMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4779  "this method unless the graph already has a column Map.");
4780  TEUCHOS_TEST_FOR_EXCEPTION(
4781  newDomainMap.is_null (), std::invalid_argument,
4782  prefix << "The new domain Map must be nonnull.");
4783 
4784  const bool debug = ::Tpetra::Details::Behavior::debug ();
4785  if (debug) {
4786  if (newImporter.is_null ()) {
4787  // It's not a good idea to put expensive operations in a macro
4788  // clause, even if they are side effect - free, because macros
4789  // don't promise that they won't evaluate their arguments more
4790  // than once. It's polite for them to do so, but not required.
4791  const bool colSameAsDom = colMap_->isSameAs (*newDomainMap);
4792  TEUCHOS_TEST_FOR_EXCEPTION
4793  (colSameAsDom, std::invalid_argument, "If the new Import is null, "
4794  "then the new domain Map must be the same as the current column Map.");
4795  }
4796  else {
4797  const bool colSameAsTgt =
4798  colMap_->isSameAs (* (newImporter->getTargetMap ()));
4799  const bool newDomSameAsSrc =
4800  newDomainMap->isSameAs (* (newImporter->getSourceMap ()));
4801  TEUCHOS_TEST_FOR_EXCEPTION
4802  (! colSameAsTgt || ! newDomSameAsSrc, std::invalid_argument, "If the "
4803  "new Import is nonnull, then the current column Map must be the same "
4804  "as the new Import's target Map, and the new domain Map must be the "
4805  "same as the new Import's source Map.");
4806  }
4807  }
4808 
4809  domainMap_ = newDomainMap;
4810  importer_ = Teuchos::rcp_const_cast<import_type> (newImporter);
4811  }
4812 
4813  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4817  {
4818  return lclGraph_;
4819  }
4820 
4821  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4822  void
4824  computeGlobalConstants (const bool computeLocalTriangularConstants)
4825  {
4826  using ::Tpetra::Details::ProfilingRegion;
4827  using Teuchos::ArrayView;
4828  using Teuchos::outArg;
4829  using Teuchos::reduceAll;
4830  typedef global_size_t GST;
4831 
4832  ProfilingRegion regionCGC ("Tpetra::CrsGraph::computeGlobalConstants");
4833 
4834  this->computeLocalConstants (computeLocalTriangularConstants);
4835 
4836  // Compute global constants from local constants. Processes that
4837  // already have local constants still participate in the
4838  // all-reduces, using their previously computed values.
4839  if (! this->haveGlobalConstants_) {
4840  const Teuchos::Comm<int>& comm = * (this->getComm ());
4841  // Promote all the nodeNum* and nodeMaxNum* quantities from
4842  // size_t to global_size_t, when doing the all-reduces for
4843  // globalNum* / globalMaxNum* results.
4844  //
4845  // FIXME (mfh 07 May 2013) Unfortunately, we either have to do
4846  // this in two all-reduces (one for the sum and the other for
4847  // the max), or use a custom MPI_Op that combines the sum and
4848  // the max. The latter might even be slower than two
4849  // all-reduces on modern network hardware. It would also be a
4850  // good idea to use nonblocking all-reduces (MPI 3), so that we
4851  // don't have to wait around for the first one to finish before
4852  // starting the second one.
4853  GST lcl[2], gbl[2];
4854  lcl[0] = static_cast<GST> (this->getNodeNumEntries ());
4855 
4856  // mfh 03 May 2018: nodeNumDiags_ is invalid if
4857  // computeLocalTriangularConstants is false, but there's no
4858  // practical network latency difference between an all-reduce of
4859  // length 1 and an all-reduce of length 2, so it's not worth
4860  // distinguishing between the two. However, we do want to avoid
4861  // integer overflow, so we'll just set the input local sum to
4862  // zero in that case.
4863  lcl[1] = computeLocalTriangularConstants ?
4864  static_cast<GST> (this->nodeNumDiags_) :
4865  static_cast<GST> (0);
4866 
4867  reduceAll<int,GST> (comm, Teuchos::REDUCE_SUM, 2, lcl, gbl);
4868  this->globalNumEntries_ = gbl[0];
4869 
4870  // mfh 03 May 2018: If not computing local triangular
4871  // properties, users want this to be invalid, not just zero.
4872  // This will help with debugging.
4873  this->globalNumDiags_ = computeLocalTriangularConstants ?
4874  gbl[1] :
4875  Teuchos::OrdinalTraits<GST>::invalid ();
4876 
4877  const GST lclMaxNumRowEnt = static_cast<GST> (this->nodeMaxNumRowEntries_);
4878  reduceAll<int, GST> (comm, Teuchos::REDUCE_MAX, lclMaxNumRowEnt,
4879  outArg (this->globalMaxNumRowEntries_));
4880  this->haveGlobalConstants_ = true;
4881  }
4882  }
4883 
4884 
4885  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4886  void
4888  computeLocalConstants (const bool computeLocalTriangularConstants)
4889  {
4891  using ::Tpetra::Details::ProfilingRegion;
4892 
4893  ProfilingRegion regionCLC ("Tpetra::CrsGraph::computeLocalConstants");
4894  if (this->haveLocalConstants_) {
4895  return;
4896  }
4897 
4898  // Reset local properties
4899  this->lowerTriangular_ = false;
4900  this->upperTriangular_ = false;
4901  this->nodeMaxNumRowEntries_ = Teuchos::OrdinalTraits<size_t>::invalid ();
4902  this->nodeNumDiags_ = Teuchos::OrdinalTraits<size_t>::invalid ();
4903 
4904  if (computeLocalTriangularConstants) {
4905  const bool hasRowAndColumnMaps =
4906  this->rowMap_.get () != nullptr && this->colMap_.get () != nullptr;
4907  if (hasRowAndColumnMaps) {
4908  auto lclRowMap = this->rowMap_->getLocalMap ();
4909  auto lclColMap = this->colMap_->getLocalMap ();
4910 
4911  // Make sure that the GPU can see any updates made on host.
4912  // This code only reads the local graph, so we don't need a
4913  // fence afterwards.
4914  execution_space::fence ();
4915 
4916  // mfh 01 May 2018: See GitHub Issue #2658.
4917  constexpr bool ignoreMapsForTriStruct = true;
4918  auto result =
4919  determineLocalTriangularStructure (this->lclGraph_, lclRowMap,
4920  lclColMap, ignoreMapsForTriStruct);
4921  this->lowerTriangular_ = result.couldBeLowerTriangular;
4922  this->upperTriangular_ = result.couldBeUpperTriangular;
4923  this->nodeMaxNumRowEntries_ = result.maxNumRowEnt;
4924  this->nodeNumDiags_ = result.diagCount;
4925  }
4926  else {
4927  this->nodeMaxNumRowEntries_ = 0;
4928  this->nodeNumDiags_ = 0;
4929  }
4930  }
4931  else {
4932  using LO = local_ordinal_type;
4933  // Make sure that the GPU can see any updates made on host.
4934  // This code only reads the local graph, so we don't need a
4935  // fence afterwards.
4936  execution_space::fence ();
4937 
4938  auto ptr = this->lclGraph_.row_map;
4939  const LO lclNumRows = ptr.extent(0) == 0 ?
4940  static_cast<LO> (0) :
4941  (static_cast<LO> (ptr.extent(0)) - static_cast<LO> (1));
4942 
4943  const LO lclMaxNumRowEnt =
4944  ::Tpetra::Details::maxDifference ("Tpetra::CrsGraph: nodeMaxNumRowEntries",
4945  ptr, lclNumRows);
4946  this->nodeMaxNumRowEntries_ = static_cast<size_t> (lclMaxNumRowEnt);
4947  }
4948  this->haveLocalConstants_ = true;
4949  }
4950 
4951 
4952  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4953  std::pair<size_t, std::string>
4956  {
4957  using ::Tpetra::Details::ProfilingRegion;
4958  using Teuchos::arcp;
4959  using Teuchos::Array;
4960  using std::endl;
4961  typedef LocalOrdinal LO;
4962  typedef GlobalOrdinal GO;
4963  typedef device_type DT;
4964  typedef typename local_graph_type::row_map_type::non_const_value_type offset_type;
4965  typedef decltype (k_numRowEntries_) row_entries_type;
4966  typedef typename row_entries_type::non_const_value_type num_ent_type;
4967  typedef typename local_graph_type::entries_type::non_const_type
4968  lcl_col_inds_type;
4969  typedef Kokkos::View<GO*, typename lcl_col_inds_type::array_layout,
4970  device_type> gbl_col_inds_type;
4971  const char tfecfFuncName[] = "makeIndicesLocal: ";
4972  ProfilingRegion regionMakeIndicesLocal ("Tpetra::CrsGraph::makeIndicesLocal");
4973 
4974  // These are somewhat global properties, so it's safe to have
4975  // exception checks for them, rather than returning an error code.
4976  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4977  (! this->hasColMap (), std::logic_error, "The graph does not have a "
4978  "column Map yet. This method should never be called in that case. "
4979  "Please report this bug to the Tpetra developers.");
4980  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4981  (this->getColMap ().is_null (), std::logic_error, "The graph claims "
4982  "that it has a column Map, because hasColMap() returns true. However, "
4983  "the result of getColMap() is null. This should never happen. Please "
4984  "report this bug to the Tpetra developers.");
4985 
4986  // Return value 1: The number of column indices (counting
4987  // duplicates) that could not be converted to local indices,
4988  // because they were not in the column Map on the calling process.
4989  size_t lclNumErrs = 0;
4990  std::ostringstream errStrm; // for return value 2 (error string)
4991 
4992  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
4993  const map_type& colMap = * (this->getColMap ());
4994 
4995  if (this->isGloballyIndexed () && lclNumRows != 0) {
4996  // This is a host-accessible View.
4997  typename row_entries_type::const_type h_numRowEnt =
4998  this->k_numRowEntries_;
4999 
5000  // Allocate space for local indices.
5001  if (this->getProfileType () == StaticProfile) {
5002  // If GO and LO are the same size, we can reuse the existing
5003  // array of 1-D index storage to convert column indices from
5004  // GO to LO. Otherwise, we'll just allocate a new buffer.
5005  constexpr bool LO_GO_same = std::is_same<LO, GO>::value;
5006  if (LO_GO_same) {
5007  // This prevents a build error (illegal assignment) if
5008  // LO_GO_same is _not_ true. Only the first branch
5009  // (returning k_gblInds1D_) should ever get taken.
5010  k_lclInds1D_ = Kokkos::Impl::if_c<LO_GO_same,
5012  lcl_col_inds_type>::select (k_gblInds1D_, k_lclInds1D_);
5013  }
5014  else {
5015  if (k_rowPtrs_.extent (0) == 0) {
5016  errStrm << "k_rowPtrs_.extent(0) == 0. This should never "
5017  "happen here. Please report this bug to the Tpetra developers."
5018  << endl;
5019  // Need to return early.
5020  return std::make_pair (Tpetra::Details::OrdinalTraits<size_t>::invalid (),
5021  errStrm.str ());
5022  }
5023  const auto numEnt = ::Tpetra::Details::getEntryOnHost (k_rowPtrs_, lclNumRows);
5024 
5025  // mfh 17 Dec 2016: We don't need initial zero-fill of
5026  // k_lclInds1D_, because we will fill it below anyway.
5027  // AllowPadding would only help for aligned access (e.g.,
5028  // for vectorization) if we also were to pad each row to the
5029  // same alignment, so we'll skip AllowPadding for now.
5030 
5031  // using Kokkos::AllowPadding;
5032  using Kokkos::view_alloc;
5033  using Kokkos::WithoutInitializing;
5034 
5035  // When giving the label as an argument to
5036  // Kokkos::view_alloc, the label must be a string and not a
5037  // char*, else the code won't compile. This is because
5038  // view_alloc also allows a raw pointer as its first
5039  // argument. See
5040  // https://github.com/kokkos/kokkos/issues/434. This is a
5041  // large allocation typically, so the overhead of creating
5042  // an std::string is minor.
5043  const std::string label ("Tpetra::CrsGraph::lclind");
5044  k_lclInds1D_ =
5045  lcl_col_inds_type (view_alloc (label, WithoutInitializing), numEnt);
5046  }
5047 
5048  auto lclColMap = colMap.getLocalMap ();
5049  // This is a "device mirror" of the host View h_numRowEnt.
5050  //
5051  // NOTE (mfh 27 Sep 2016) Currently, the right way to get a
5052  // Device instance is to use its default constructor. See the
5053  // following Kokkos issue:
5054  //
5055  // https://github.com/kokkos/kokkos/issues/442
5056  auto k_numRowEnt = Kokkos::create_mirror_view (device_type (), h_numRowEnt);
5057 
5059  lclNumErrs =
5060  convertColumnIndicesFromGlobalToLocal<LO, GO, DT, offset_type, num_ent_type> (k_lclInds1D_,
5061  k_gblInds1D_,
5062  k_rowPtrs_,
5063  lclColMap,
5064  k_numRowEnt);
5065  if (lclNumErrs != 0) {
5066  const int myRank = [this] () {
5067  auto map = this->getMap ();
5068  if (map.is_null ()) {
5069  return 0;
5070  }
5071  else {
5072  auto comm = map->getComm ();
5073  return comm.is_null () ? 0 : comm->getRank ();
5074  }
5075  } ();
5076  const bool pluralNumErrs = (lclNumErrs != static_cast<size_t> (1));
5077  errStrm << "(Process " << myRank << ") When converting column "
5078  "indices from global to local, we encountered " << lclNumErrs
5079  << " ind" << (pluralNumErrs ? "ices" : "ex")
5080  << " that do" << (pluralNumErrs ? "es" : "")
5081  << " not live in the column Map on this process." << endl;
5082  }
5083 
5084  // We've converted column indices from global to local, so we
5085  // can deallocate the global column indices (which we know are
5086  // in 1-D storage, because the graph has static profile).
5087  k_gblInds1D_ = gbl_col_inds_type ();
5088  }
5089  else { // the graph has dynamic profile (2-D index storage)
5090  // Avoid any drama with *this capture, by extracting the
5091  // variables that the thread-parallel loop will need below.
5092  // This is just a shallow copy.
5093  Teuchos::ArrayRCP<Teuchos::Array<LO> > lclInds2D (lclNumRows);
5094  Teuchos::ArrayRCP<Teuchos::Array<GO> > gblInds2D = this->gblInds2D_;
5095 
5096  // We must use a host thread parallelization here, because
5097  // Teuchos::ArrayRCP does not work in CUDA.
5098  typedef typename Kokkos::View<LO*, device_type>::HostMirror::execution_space
5099  host_execution_space;
5100  typedef Kokkos::RangePolicy<host_execution_space, LO> range_type;
5101  Kokkos::parallel_reduce (
5102  "Tpetra::CrsGraph::makeIndicesLocal (DynamicProfile)",
5103  range_type (0, lclNumRows),
5104  [&gblInds2D, &h_numRowEnt, &lclInds2D, &colMap] (const LO& lclRow, size_t& numErrs) {
5105  const GO* const curGblInds = gblInds2D[lclRow].getRawPtr ();
5106  // NOTE (mfh 26 Jun 2016) It's always legal to cast the
5107  // number of entries in a row to LO, as long as the row
5108  // doesn't have too many duplicate entries.
5109  const LO rna = static_cast<LO> (gblInds2D[lclRow].size ());
5110  const LO numEnt = static_cast<LO> (h_numRowEnt(lclRow));
5111  lclInds2D[lclRow].resize (rna); // purely thread-local, so safe
5112  LO* const curLclInds = lclInds2D[lclRow].getRawPtr ();
5113  for (LO j = 0; j < numEnt; ++j) {
5114  const GO gid = curGblInds[j];
5115  const LO lid = colMap.getLocalElement (gid);
5116  curLclInds[j] = lid;
5117  if (lid == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
5118  ++numErrs;
5119  }
5120  }
5121  }, lclNumErrs);
5122 
5123  this->lclInds2D_ = lclInds2D; // "commit" the result
5124 
5125  // If we detected an error in the above loop, go back and find
5126  // the global column indices not in the column Map on the
5127  // calling process.
5128  if (lclNumErrs != 0) {
5129  const int myRank = [this] () {
5130  auto map = this->getMap ();
5131  if (map.is_null ()) {
5132  return 0;
5133  }
5134  else {
5135  auto comm = map->getComm ();
5136  return comm.is_null () ? 0 : comm->getRank ();
5137  }
5138  } ();
5139 
5140  // If there are too many errors, don't bother printing them.
5141  constexpr size_t tooManyErrsToPrint = 200; // arbitrary constant
5142  if (lclNumErrs > tooManyErrsToPrint) {
5143  errStrm << "(Process " << myRank << ") When converting column "
5144  "indices from global to local, we encountered " << lclNumErrs
5145  << " indices that do not live in the column Map on this "
5146  "process. That's too many to print." << endl;
5147  }
5148  else {
5149  // Map from local row index, to any global column indices
5150  // that do not live in the column Map on the calling process.
5151  std::map<LO, std::vector<GO> > badColInds;
5152  // List of local rows lclRow for which h_numRowEnt[lclRow]
5153  // > gblInds2D_[lclRow].size().
5154  std::vector<LO> badLclRows;
5155 
5156  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
5157  const size_t numEnt = static_cast<size_t> (h_numRowEnt[lclRow]);
5158 
5159  Teuchos::ArrayView<const GO> curGblInds = gblInds2D_[lclRow] ();
5160  if (numEnt > static_cast<size_t> (curGblInds.size ())) {
5161  badLclRows.push_back (lclRow);
5162  }
5163  else {
5164  for (size_t j = 0; j < numEnt; ++j) {
5165  const GO gid = curGblInds[j];
5166  const LO lid = colMap.getLocalElement (gid);
5167  if (lid == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
5168  badColInds[lclRow].push_back (gid);
5169  }
5170  }
5171  }
5172  }
5173 
5174  const bool pluralNumErrs = (lclNumErrs != static_cast<size_t> (1));
5175  errStrm << "(Process " << myRank << ") When converting column "
5176  "indices from global to local, we encountered " << lclNumErrs
5177  << " ind" << (pluralNumErrs ? "ices" : "ex") << " that "
5178  "do" << (pluralNumErrs ? "es" : "")
5179  << " not live in the column Map on this process." << endl
5180  << "(Process " << myRank << ") Here are the bad global "
5181  "indices, listed by local row: " << endl;
5182  for (auto && eachPair : badColInds) {
5183  const LO lclRow = eachPair.first;
5184  const GO gblRow = rowMap_->getGlobalElement (lclRow);
5185  errStrm << "(Process " << myRank << ") Local row " << lclRow
5186  << " (global row " << gblRow << "): [";
5187  const size_t numBad = eachPair.second.size ();
5188  for (size_t k = 0; k < numBad; ++k) {
5189  errStrm << eachPair.second[k];
5190  if (k + size_t (1) < numBad) {
5191  errStrm << ",";
5192  }
5193  }
5194  errStrm << "]" << endl;
5195  }
5196 
5197  if (badLclRows.size () != 0) {
5198  if (lclNumErrs == 0) {
5199  // We really want lclNumErrs to be just the count of
5200  // bad column indices, but lclNumErrs != 0 also
5201  // doubles as a generic indication of error.
5202  lclNumErrs = badLclRows.size ();
5203  }
5204 
5205  errStrm << "(Process " << myRank << ") When converting column "
5206  "indices from global to local, we (also) encountered the "
5207  "following local rows lclRow on this process for which "
5208  "h_numRowEnt[lclRow] > gblInds2D_[lclRow].size(). This "
5209  "likely indicates a bug in Tpetra." << endl
5210  << "(Process " << myRank << ") [";
5211  const size_t numBad = badLclRows.size ();
5212  for (size_t k = 0; k < numBad; ++k) {
5213  const LO lclRow = badLclRows[k];
5214  errStrm << "{lclRow: " << lclRow
5215  << "h_numRowEnt[lclRow]: " << h_numRowEnt[lclRow]
5216  << "gblInds2D_[lclRow].size(): "
5217  << gblInds2D_[lclRow].size () << "}";
5218  if (k + size_t (1) < numBad) {
5219  errStrm << ", ";
5220  }
5221  }
5222  errStrm << "]" << endl;
5223  }
5224  }
5225  }
5226 
5227  this->gblInds2D_ = Teuchos::null;
5228  }
5229  } // globallyIndexed() && lclNumRows > 0
5230 
5231  this->lclGraph_ = local_graph_type (this->k_lclInds1D_, this->k_rowPtrs_);
5232  this->indicesAreLocal_ = true;
5233  this->indicesAreGlobal_ = false;
5234  this->checkInternalState ();
5235 
5236  return std::make_pair (lclNumErrs, errStrm.str ());
5237  }
5238 
5239 
5240  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5241  void
5243  makeColMap (Teuchos::Array<int>& remotePIDs)
5244  {
5245  using ::Tpetra::Details::ProfilingRegion;
5246  ProfilingRegion regionSortAndMerge ("Tpetra::CrsGraph::makeColMap");
5247  const bool debug = ::Tpetra::Details::Behavior::debug ();
5248 
5249  // this->colMap_ should be null at this point, but we accept the
5250  // future possibility that it might not be (esp. if we decide
5251  // later to support graph structure changes after first
5252  // fillComplete, which CrsGraph does not currently (as of 12 Feb
5253  // 2017) support).
5254  Teuchos::RCP<const map_type> colMap = this->colMap_;
5255  const bool sortEachProcsGids =
5256  this->sortGhostsAssociatedWithEachProcessor_;
5257 
5258  // FIXME (mfh 12 Feb 2017) ::Tpetra::Details::makeColMap returns a
5259  // per-process error code. If an error does occur on a process,
5260  // ::Tpetra::Details::makeColMap does NOT promise that all processes will
5261  // notice that error. This is the caller's responsibility. For
5262  // now, we only propagate (to all processes) and report the error
5263  // in debug mode. In the future, we need to add the local/global
5264  // error handling scheme used in BlockCrsMatrix to this class.
5265  if (debug) {
5266  using Teuchos::outArg;
5267  using Teuchos::REDUCE_MIN;
5268  using Teuchos::reduceAll;
5269  const char tfecfFuncName[] = "makeColMap: ";
5270 
5271  std::ostringstream errStrm;
5272  const int lclErrCode =
5273  ::Tpetra::Details::makeColMap (colMap, remotePIDs, this->getDomainMap (),
5274  *this, sortEachProcsGids, &errStrm);
5275  auto comm = this->getComm ();
5276  if (! comm.is_null ()) {
5277  const int lclSuccess = (lclErrCode == 0) ? 1 : 0;
5278  int gblSuccess = 0; // output argument
5279  reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess,
5280  outArg (gblSuccess));
5281  if (gblSuccess != 1) {
5282  std::ostringstream os;
5283  Tpetra::Details::gathervPrint (os, errStrm.str (), *comm);
5284  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5285  (true, std::runtime_error, "An error happened on at least one "
5286  "(MPI) process in the CrsGraph's communicator. Here are all "
5287  "processes' error messages:" << std::endl << os.str ());
5288  }
5289  }
5290  }
5291  else {
5292  (void) ::Tpetra::Details::makeColMap (colMap, remotePIDs, this->getDomainMap (),
5293  *this, sortEachProcsGids, nullptr);
5294  }
5295  // See above. We want to admit the possibility of makeColMap
5296  // actually revising an existing column Map, even though that
5297  // doesn't currently (as of 10 May 2017) happen.
5298  this->colMap_ = colMap;
5299 
5300  checkInternalState ();
5301  }
5302 
5303 
5304  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5305  void
5307  sortAndMergeAllIndices (const bool sorted, const bool merged)
5308  {
5309  using ::Tpetra::Details::ProfilingRegion;
5310  typedef LocalOrdinal LO;
5311  typedef typename Kokkos::View<LO*, device_type>::HostMirror::execution_space
5312  host_execution_space;
5313  typedef Kokkos::RangePolicy<host_execution_space, LO> range_type;
5314  const char tfecfFuncName[] = "sortAndMergeAllIndices: ";
5315  ProfilingRegion regionSortAndMerge ("Tpetra::CrsGraph::sortAndMergeAllIndices");
5316 
5317  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5318  (this->isGloballyIndexed (), std::logic_error,
5319  "This method may only be called after makeIndicesLocal." );
5320 
5321  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5322  (! merged && this->isStorageOptimized (), std::logic_error,
5323  "The graph is already storage optimized, so we shouldn't be merging any "
5324  "indices. Please report this bug to the Tpetra developers.");
5325 
5326  if (! sorted || ! merged) {
5327  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
5328  size_t totalNumDups = 0;
5329  // FIXME (mfh 08 May 2017) This may assume CUDA UVM.
5330  Kokkos::parallel_reduce (range_type (0, lclNumRows),
5331  [this, sorted, merged] (const LO& lclRow, size_t& numDups) {
5332  const RowInfo rowInfo = this->getRowInfo (lclRow);
5333  numDups += this->sortAndMergeRowIndices (rowInfo, sorted, merged);
5334  }, totalNumDups);
5335  this->indicesAreSorted_ = true; // we just sorted every row
5336  this->noRedundancies_ = true; // we just merged every row
5337  }
5338  }
5339 
5340 
5341  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5342  void
5344  makeImportExport (Teuchos::Array<int>& remotePIDs,
5345  const bool useRemotePIDs)
5346  {
5347  using ::Tpetra::Details::ProfilingRegion;
5348  using Teuchos::ParameterList;
5349  using Teuchos::RCP;
5350  using Teuchos::rcp;
5351  const char tfecfFuncName[] = "makeImportExport: ";
5352  ProfilingRegion regionMIE ("Tpetra::CrsGraph::makeImportExport");
5353 
5354  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5355  (! this->hasColMap (), std::logic_error,
5356  "This method may not be called unless the graph has a column Map.");
5357  RCP<ParameterList> params = this->getNonconstParameterList (); // could be null
5358 
5359  // Don't do any checks to see if we need to create the Import, if
5360  // it exists already.
5361  //
5362  // FIXME (mfh 25 Mar 2013) This will become incorrect if we
5363  // change CrsGraph in the future to allow changing the column
5364  // Map after fillComplete. For now, the column Map is fixed
5365  // after the first fillComplete call.
5366  if (importer_.is_null ()) {
5367  // Create the Import instance if necessary.
5368  if (domainMap_ != colMap_ && (! domainMap_->isSameAs (*colMap_))) {
5369  if (params.is_null () || ! params->isSublist ("Import")) {
5370  if (useRemotePIDs) {
5371  importer_ = rcp (new import_type (domainMap_, colMap_, remotePIDs));
5372  }
5373  else {
5374  importer_ = rcp (new import_type (domainMap_, colMap_));
5375  }
5376  }
5377  else {
5378  RCP<ParameterList> importSublist = sublist (params, "Import", true);
5379  if (useRemotePIDs) {
5380  RCP<import_type> newImp =
5381  rcp (new import_type (domainMap_, colMap_, remotePIDs,
5382  importSublist));
5383  importer_ = newImp;
5384  }
5385  else {
5386  importer_ = rcp (new import_type (domainMap_, colMap_, importSublist));
5387  }
5388  }
5389  }
5390  }
5391 
5392  // Don't do any checks to see if we need to create the Export, if
5393  // it exists already.
5394  if (exporter_.is_null ()) {
5395  // Create the Export instance if necessary.
5396  if (rangeMap_ != rowMap_ && ! rangeMap_->isSameAs (*rowMap_)) {
5397  if (params.is_null () || ! params->isSublist ("Export")) {
5398  exporter_ = rcp (new export_type (rowMap_, rangeMap_));
5399  }
5400  else {
5401  RCP<ParameterList> exportSublist = sublist (params, "Export", true);
5402  exporter_ = rcp (new export_type (rowMap_, rangeMap_, exportSublist));
5403  }
5404  }
5405  }
5406  }
5407 
5408 
5409  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5410  std::string
5413  {
5414  std::ostringstream oss;
5415  oss << dist_object_type::description ();
5416  if (isFillComplete ()) {
5417  oss << "{status = fill complete"
5418  << ", global rows = " << getGlobalNumRows()
5419  << ", global cols = " << getGlobalNumCols()
5420  << ", global num entries = " << getGlobalNumEntries()
5421  << "}";
5422  }
5423  else {
5424  oss << "{status = fill not complete"
5425  << ", global rows = " << getGlobalNumRows()
5426  << "}";
5427  }
5428  return oss.str();
5429  }
5430 
5431 
5432  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5433  void
5435  describe (Teuchos::FancyOStream &out,
5436  const Teuchos::EVerbosityLevel verbLevel) const
5437  {
5438  using Teuchos::ArrayView;
5439  using Teuchos::Comm;
5440  using Teuchos::RCP;
5441  using Teuchos::VERB_DEFAULT;
5442  using Teuchos::VERB_NONE;
5443  using Teuchos::VERB_LOW;
5444  using Teuchos::VERB_MEDIUM;
5445  using Teuchos::VERB_HIGH;
5446  using Teuchos::VERB_EXTREME;
5447  using std::endl;
5448  using std::setw;
5449 
5450  Teuchos::EVerbosityLevel vl = verbLevel;
5451  if (vl == VERB_DEFAULT) vl = VERB_LOW;
5452  RCP<const Comm<int> > comm = this->getComm();
5453  const int myImageID = comm->getRank(),
5454  numImages = comm->getSize();
5455  size_t width = 1;
5456  for (size_t dec=10; dec<getGlobalNumRows(); dec *= 10) {
5457  ++width;
5458  }
5459  width = std::max<size_t> (width, static_cast<size_t> (11)) + 2;
5460  Teuchos::OSTab tab (out);
5461  // none: print nothing
5462  // low: print O(1) info from node 0
5463  // medium: print O(P) info, num entries per node
5464  // high: print O(N) info, num entries per row
5465  // extreme: print O(NNZ) info: print graph indices
5466  //
5467  // for medium and higher, print constituent objects at specified verbLevel
5468  if (vl != VERB_NONE) {
5469  if (myImageID == 0) out << this->description() << std::endl;
5470  // O(1) globals, minus what was already printed by description()
5471  if (isFillComplete() && myImageID == 0) {
5472  out << "Global number of diagonals = " << globalNumDiags_ << std::endl;
5473  out << "Global max number of row entries = " << globalMaxNumRowEntries_ << std::endl;
5474  }
5475  // constituent objects
5476  if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
5477  if (myImageID == 0) out << "\nRow map: " << std::endl;
5478  rowMap_->describe(out,vl);
5479  if (colMap_ != Teuchos::null) {
5480  if (myImageID == 0) out << "\nColumn map: " << std::endl;
5481  colMap_->describe(out,vl);
5482  }
5483  if (domainMap_ != Teuchos::null) {
5484  if (myImageID == 0) out << "\nDomain map: " << std::endl;
5485  domainMap_->describe(out,vl);
5486  }
5487  if (rangeMap_ != Teuchos::null) {
5488  if (myImageID == 0) out << "\nRange map: " << std::endl;
5489  rangeMap_->describe(out,vl);
5490  }
5491  }
5492  // O(P) data
5493  if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
5494  for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
5495  if (myImageID == imageCtr) {
5496  out << "Node ID = " << imageCtr << std::endl
5497  << "Node number of entries = " << this->getNodeNumEntries () << std::endl
5498  << "Node number of diagonals = " << nodeNumDiags_ << std::endl
5499  << "Node max number of entries = " << nodeMaxNumRowEntries_ << std::endl;
5500  if (! indicesAreAllocated ()) {
5501  out << "Indices are not allocated." << std::endl;
5502  }
5503  }
5504  comm->barrier();
5505  comm->barrier();
5506  comm->barrier();
5507  }
5508  }
5509  // O(N) and O(NNZ) data
5510  if (vl == VERB_HIGH || vl == VERB_EXTREME) {
5511  for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
5512  if (myImageID == imageCtr) {
5513  out << std::setw(width) << "Node ID"
5514  << std::setw(width) << "Global Row"
5515  << std::setw(width) << "Num Entries";
5516  if (vl == VERB_EXTREME) {
5517  out << " Entries";
5518  }
5519  out << std::endl;
5520  const LocalOrdinal lclNumRows =
5521  static_cast<LocalOrdinal> (this->getNodeNumRows ());
5522  for (LocalOrdinal r=0; r < lclNumRows; ++r) {
5523  const RowInfo rowinfo = this->getRowInfo (r);
5524  GlobalOrdinal gid = rowMap_->getGlobalElement(r);
5525  out << std::setw(width) << myImageID
5526  << std::setw(width) << gid
5527  << std::setw(width) << rowinfo.numEntries;
5528  if (vl == VERB_EXTREME) {
5529  out << " ";
5530  if (isGloballyIndexed()) {
5531  ArrayView<const GlobalOrdinal> rowview = getGlobalView(rowinfo);
5532  for (size_t j=0; j < rowinfo.numEntries; ++j) out << rowview[j] << " ";
5533  }
5534  else if (isLocallyIndexed()) {
5535  ArrayView<const LocalOrdinal> rowview = getLocalView(rowinfo);
5536  for (size_t j=0; j < rowinfo.numEntries; ++j) out << colMap_->getGlobalElement(rowview[j]) << " ";
5537  }
5538  }
5539  out << std::endl;
5540  }
5541  }
5542  comm->barrier();
5543  comm->barrier();
5544  comm->barrier();
5545  }
5546  }
5547  }
5548  }
5549 
5550 
5551  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5552  bool
5554  checkSizes (const SrcDistObject& /* source */)
5555  {
5556  // It's not clear what kind of compatibility checks on sizes can
5557  // be performed here. Epetra_CrsGraph doesn't check any sizes for
5558  // compatibility.
5559  return true;
5560  }
5561 
5562  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5563  void
5565 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
5566  copyAndPermuteNew
5567 #else // TPETRA_ENABLE_DEPRECATED_CODE
5568  copyAndPermute
5569 #endif // TPETRA_ENABLE_DEPRECATED_CODE
5570  (const SrcDistObject& source,
5571  const size_t numSameIDs,
5572  const Kokkos::DualView<const local_ordinal_type*,
5573  buffer_device_type>& permuteToLIDs,
5574  const Kokkos::DualView<const local_ordinal_type*,
5575  buffer_device_type>& permuteFromLIDs)
5576  {
5577  using std::endl;
5578  using LO = local_ordinal_type;
5579  using GO = global_ordinal_type;
5580  using this_type = CrsGraph<LO, GO, node_type>;
5581  using row_graph_type = RowGraph<LO, GO, node_type>;
5582  const char tfecfFuncName[] = "copyAndPermute: ";
5583  const bool debug = ::Tpetra::Details::Behavior::debug ("CrsGraph");
5584 
5585  std::unique_ptr<std::string> prefix;
5586  if (debug) {
5587  std::ostringstream os;
5588  const int myRank = this->getMap ()->getComm ()->getRank ();
5589  os << "Proc " << myRank << ": Tpetra::CrsGraph::copyAndPermute: ";
5590  prefix = std::unique_ptr<std::string> (new std::string (os.str ()));
5591  os << endl;
5592  std::cerr << os.str ();
5593  }
5594 
5595  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5596  (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0),
5597  std::runtime_error, "permuteToLIDs.extent(0) = "
5598  << permuteToLIDs.extent (0) << " != permuteFromLIDs.extent(0) = "
5599  << permuteFromLIDs.extent (0) << ".");
5600 
5601  // We know from checkSizes that the source object is a
5602  // row_graph_type, so we don't need to check again.
5603  const row_graph_type& srcRowGraph =
5604  dynamic_cast<const row_graph_type&> (source);
5605 
5606  if (this->getProfileType () == StaticProfile) {
5607  if (debug) {
5608  std::ostringstream os;
5609  os << *prefix << "Target is StaticProfile; do CRS padding" << endl;
5610  std::cerr << os.str ();
5611  }
5612  auto padding = computeCrsPaddingNew (srcRowGraph, numSameIDs,
5613  permuteToLIDs, permuteFromLIDs);
5614  this->applyCrsPadding (padding);
5615  }
5616  else if (debug) {
5617  std::ostringstream os;
5618  os << *prefix << "Target is DynamicProfile" << endl;
5619  std::cerr << os.str ();
5620  }
5621 
5622  // If the source object is actually a CrsGraph, we can use view
5623  // mode instead of copy mode to access the entries in each row,
5624  // if the graph is not fill complete.
5625  const this_type* srcCrsGraph = dynamic_cast<const this_type*> (&source);
5626 
5627  const map_type& srcRowMap = * (srcRowGraph.getRowMap ());
5628  const map_type& tgtRowMap = * (this->getRowMap ());
5629  const bool src_filled = srcRowGraph.isFillComplete ();
5630  Teuchos::Array<GO> row_copy;
5631  LO myid = 0;
5632 
5633  //
5634  // "Copy" part of "copy and permute."
5635  //
5636  if (src_filled || srcCrsGraph == nullptr) {
5637  if (debug) {
5638  std::ostringstream os;
5639  os << *prefix << "src_filled || srcCrsGraph == nullptr" << endl;
5640  std::cerr << os.str ();
5641  }
5642  // If the source graph is fill complete, we can't use view mode,
5643  // because the data might be stored in a different format not
5644  // compatible with the expectations of view mode. Also, if the
5645  // source graph is not a CrsGraph, we can't use view mode,
5646  // because RowGraph only provides copy mode access to the data.
5647  for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
5648  const GO gid = srcRowMap.getGlobalElement (myid);
5649  size_t row_length = srcRowGraph.getNumEntriesInGlobalRow (gid);
5650  row_copy.resize (row_length);
5651  size_t check_row_length = 0;
5652  srcRowGraph.getGlobalRowCopy (gid, row_copy (), check_row_length);
5653  this->insertGlobalIndices (gid, row_copy ());
5654  }
5655  } else {
5656  if (debug) {
5657  std::ostringstream os;
5658  os << *prefix << "! src_filled && srcCrsGraph != nullptr" << endl;
5659  std::cerr << os.str ();
5660  }
5661  for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
5662  const GO gid = srcRowMap.getGlobalElement (myid);
5663  Teuchos::ArrayView<const GO> row;
5664  srcCrsGraph->getGlobalRowView (gid, row);
5665  this->insertGlobalIndices (gid, row);
5666  }
5667  }
5668 
5669  //
5670  // "Permute" part of "copy and permute."
5671  //
5672  auto permuteToLIDs_h = permuteToLIDs.view_host ();
5673  auto permuteFromLIDs_h = permuteFromLIDs.view_host ();
5674 
5675  if (src_filled || srcCrsGraph == nullptr) {
5676  for (LO i = 0; i < static_cast<LO> (permuteToLIDs_h.extent (0)); ++i) {
5677  const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs_h[i]);
5678  const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs_h[i]);
5679  size_t row_length = srcRowGraph.getNumEntriesInGlobalRow (srcgid);
5680  row_copy.resize (row_length);
5681  size_t check_row_length = 0;
5682  srcRowGraph.getGlobalRowCopy (srcgid, row_copy (), check_row_length);
5683  this->insertGlobalIndices (mygid, row_copy ());
5684  }
5685  } else {
5686  for (LO i = 0; i < static_cast<LO> (permuteToLIDs_h.extent (0)); ++i) {
5687  const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs_h[i]);
5688  const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs_h[i]);
5689  Teuchos::ArrayView<const GO> row;
5690  srcCrsGraph->getGlobalRowView (srcgid, row);
5691  this->insertGlobalIndices (mygid, row);
5692  }
5693  }
5694 
5695  if (debug) {
5696  std::ostringstream os;
5697  os << *prefix << "Done" << endl;
5698  std::cerr << os.str ();
5699  }
5700  }
5701 
5702  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5703  void
5704  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5705  applyCrsPadding(const Kokkos::UnorderedMap<LocalOrdinal, size_t, device_type>& padding)
5706  {
5707  // const char tfecfFuncName[] = "applyCrsPadding";
5708  using execution_space = typename device_type::execution_space;
5709  using row_ptrs_type = typename local_graph_type::row_map_type::non_const_type;
5710  using indices_type = t_GlobalOrdinal_1D;
5711  using local_indices_type = typename local_graph_type::entries_type::non_const_type;
5712  using range_policy = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LocalOrdinal>>;
5714 
5715  // Assume global indexing we don't have any indices yet
5716  if (! this->indicesAreAllocated()) {
5717  allocateIndices(GlobalIndices);
5718  }
5719 
5720  // Making copies here because k_rowPtrs_ has a const type. Otherwise, we
5721  // would use it directly.
5722 
5723  row_ptrs_type row_ptrs_beg("row_ptrs_beg", this->k_rowPtrs_.extent(0));
5724  Kokkos::deep_copy(row_ptrs_beg, this->k_rowPtrs_);
5725 
5726  const size_t N = (row_ptrs_beg.extent(0) == 0 ? 0 : row_ptrs_beg.extent(0) - 1);
5727  row_ptrs_type row_ptrs_end("row_ptrs_end", N);
5728 
5729  bool refill_num_row_entries = false;
5730  if (this->k_numRowEntries_.extent(0) > 0) {
5731  // Case 1: Unpacked storage
5732  refill_num_row_entries = true;
5733  auto num_row_entries = this->k_numRowEntries_;
5734  Kokkos::parallel_for("Fill end row pointers", range_policy(0, N),
5735  KOKKOS_LAMBDA(const size_t i){
5736  row_ptrs_end(i) = row_ptrs_beg(i) + num_row_entries(i);
5737  }
5738  );
5739 
5740  } else {
5741  // mfh If packed storage, don't need row_ptrs_end to be separate allocation;
5742  // could just have it alias row_ptrs_beg+1.
5743  // Case 2: Packed storage
5744  Kokkos::parallel_for("Fill end row pointers", range_policy(0, N),
5745  KOKKOS_LAMBDA(const size_t i){
5746  row_ptrs_end(i) = row_ptrs_beg(i+1);
5747  }
5748  );
5749  }
5750 
5751  if(this->isGloballyIndexed()) {
5752  indices_type indices("indices", this->k_gblInds1D_.extent(0));
5753  Kokkos::deep_copy(indices, this->k_gblInds1D_);
5754  using padding_type = Kokkos::UnorderedMap<LocalOrdinal, size_t, device_type>;
5755  padCrsArrays<row_ptrs_type,indices_type,padding_type>(row_ptrs_beg, row_ptrs_end, indices, padding);
5756  this->k_gblInds1D_ = indices;
5757  }
5758  else {
5759  local_indices_type indices("indices", this->k_lclInds1D_.extent(0));
5760  Kokkos::deep_copy(indices, this->k_lclInds1D_);
5761  using padding_type = Kokkos::UnorderedMap<LocalOrdinal, size_t, device_type>;
5762  padCrsArrays<row_ptrs_type,local_indices_type,padding_type>(row_ptrs_beg, row_ptrs_end, indices, padding);
5763  this->k_lclInds1D_ = indices;
5764  }
5765 
5766 
5767  if (refill_num_row_entries) {
5768  auto num_row_entries = this->k_numRowEntries_;
5769  Kokkos::parallel_for("Fill num entries", range_policy(0, N),
5770  KOKKOS_LAMBDA(const size_t i){
5771  num_row_entries(i) = row_ptrs_end(i) - row_ptrs_beg(i);
5772  }
5773  );
5774  }
5775  this->k_rowPtrs_ = row_ptrs_beg;
5776  }
5777 
5778  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5779  Kokkos::UnorderedMap<LocalOrdinal, size_t, typename Node::device_type>
5780  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5781  computeCrsPadding (const RowGraph<LocalOrdinal,GlobalOrdinal,Node>& source,
5782  size_t numSameIDs,
5783  const Teuchos::ArrayView<const LocalOrdinal> &permuteToLIDs,
5784  const Teuchos::ArrayView<const LocalOrdinal> &permuteFromLIDs)
5785  {
5786  using LO = LocalOrdinal;
5787  using GO = GlobalOrdinal;
5788  using execution_space = typename device_type::execution_space;
5789  const char tfecfFuncName[] = "computeCrsPadding";
5790 
5791  // Resize row pointers and indices to accommodate incoming data
5792  execution_space::fence (); // Make sure device sees changes made by host
5793  const map_type& src_row_map = *(source.getRowMap());
5794  using padding_type = Kokkos::UnorderedMap<LocalOrdinal, size_t, device_type>;
5795  padding_type padding(numSameIDs+permuteFromLIDs.size());
5796  for (LO tgtid=0; tgtid<static_cast<LO>(numSameIDs); ++tgtid) {
5797  const GO srcgid = src_row_map.getGlobalElement(tgtid);
5798  auto how_much_padding = source.getNumEntriesInGlobalRow(srcgid);
5799  auto result = padding.insert(tgtid, how_much_padding);
5800  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(result.failed(), std::runtime_error,
5801  "unable to insert padding for LID " << tgtid);
5802  }
5803  for (LO i=0; i<permuteToLIDs.size(); ++i) {
5804  const LO tgtid = permuteToLIDs[i];
5805  const GO srcgid = src_row_map.getGlobalElement(permuteFromLIDs[i]);
5806  auto how_much_padding = source.getNumEntriesInGlobalRow(srcgid);
5807  auto result = padding.insert(tgtid, how_much_padding);
5808  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(result.failed(), std::runtime_error,
5809  "unable to insert padding for LID " << tgtid);
5810  }
5811  execution_space::fence (); // Make sure device sees changes made by host
5812  TEUCHOS_TEST_FOR_EXCEPTION(padding.failed_insert(), std::runtime_error,
5813  "failed to insert one or more indices in to padding map");
5814  return padding;
5815  }
5816 
5817  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5818  Kokkos::UnorderedMap<LocalOrdinal, size_t, typename Node::device_type>
5819  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5820  computeCrsPaddingNew (const RowGraph<LocalOrdinal,GlobalOrdinal,Node>& source,
5821  const size_t numSameIDs,
5822  const Kokkos::DualView<const local_ordinal_type*,
5823  buffer_device_type>& permuteToLIDs,
5824  const Kokkos::DualView<const local_ordinal_type*,
5825  buffer_device_type>& permuteFromLIDs)
5826  {
5827  using LO = LocalOrdinal;
5828  using GO = GlobalOrdinal;
5829  using execution_space = typename device_type::execution_space;
5830  const char tfecfFuncName[] = "computeCrsPaddingNew: ";
5831 
5832  execution_space::fence ();
5833 
5834  // Resize row pointers and indices to accommodate incoming data
5835  const map_type& src_row_map = * (source.getRowMap ());
5836  using padding_type = Kokkos::UnorderedMap<LO, size_t, device_type>;
5837  padding_type padding (numSameIDs + permuteFromLIDs.extent (0));
5838  for (LO tgtid = 0; tgtid < static_cast<LO> (numSameIDs); ++tgtid) {
5839  const GO srcgid = src_row_map.getGlobalElement (tgtid);
5840  auto how_much_padding = source.getNumEntriesInGlobalRow (srcgid);
5841  auto result = padding.insert (tgtid, how_much_padding);
5842  // FIXME (mfh 09 Apr 2019) Kokkos::UnorderedMap is allowed to
5843  // fail even if the user did nothing wrong. We should actually
5844  // have a retry option. I just copied this code over from
5845  // computeCrsPadding.
5846  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5847  (result.failed(), std::runtime_error,
5848  "unable to insert padding for LID " << tgtid);
5849  }
5850 
5851  auto permuteToLIDs_h = permuteToLIDs.view_host ();
5852  auto permuteFromLIDs_h = permuteFromLIDs.view_host ();
5853 
5854  for (LO i = 0; i < static_cast<LO> (permuteToLIDs_h.extent (0)); ++i) {
5855  const LO tgtid = permuteToLIDs_h[i];
5856  const GO srcgid = src_row_map.getGlobalElement (permuteFromLIDs_h[i]);
5857  auto how_much_padding = source.getNumEntriesInGlobalRow (srcgid);
5858  auto result = padding.insert (tgtid, how_much_padding);
5859  // FIXME (mfh 09 Apr 2019) Kokkos::UnorderedMap is allowed to
5860  // fail even if the user did nothing wrong. We should actually
5861  // have a retry option. I just copied this code over from
5862  // computeCrsPadding.
5863  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5864  (result.failed(), std::runtime_error,
5865  "unable to insert padding for LID " << tgtid);
5866  }
5867  execution_space::fence (); // Make sure device sees changes made by host
5868  TEUCHOS_TEST_FOR_EXCEPTION
5869  (padding.failed_insert(), std::runtime_error,
5870  "failed to insert one or more indices in to padding map");
5871  return padding;
5872  }
5873 
5874  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5875  Kokkos::UnorderedMap<LocalOrdinal, size_t, typename Node::device_type>
5876  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5877  computeCrsPadding (const Teuchos::ArrayView<const LocalOrdinal> &importLIDs,
5878  const Teuchos::ArrayView<size_t> &numPacketsPerLID)
5879  {
5880  using execution_space = typename device_type::execution_space;
5881  const char tfecfFuncName[] = "computeCrsPadding";
5882  // Creating padding for each new incoming index
5883  execution_space::fence (); // Make sure device sees changes made by host
5884  using padding_type = Kokkos::UnorderedMap<LocalOrdinal, size_t, device_type>;
5885  padding_type padding(importLIDs.size());
5886  auto numEnt = static_cast<size_t>(importLIDs.size());
5887  for (size_t i=0; i<numEnt; i++) {
5888  auto result = padding.insert(importLIDs[i], numPacketsPerLID[i]);
5889  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(result.failed(), std::runtime_error,
5890  "unable to insert padding for LID " << importLIDs[i]);
5891  }
5892  execution_space::fence (); // Make sure device sees changes made by host
5893  TEUCHOS_TEST_FOR_EXCEPTION(padding.failed_insert(), std::runtime_error,
5894  "failed to insert one or more indices in to padding map");
5895  return padding;
5896  }
5897 
5898  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5899  Kokkos::UnorderedMap<LocalOrdinal, size_t, typename Node::device_type>
5900  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5901  computeCrsPaddingNew (const Kokkos::DualView<const local_ordinal_type*,
5902  buffer_device_type>& importLIDs,
5903  Kokkos::DualView<size_t*,
5904  buffer_device_type> numPacketsPerLID) const
5905  {
5906  using execution_space = typename device_type::execution_space;
5907  const char tfecfFuncName[] = "computeCrsPaddingNew: ";
5908 
5909  // Creating padding for each new incoming index
5910  execution_space::fence (); // Make sure device sees changes made by host
5911  using padding_type =
5912  Kokkos::UnorderedMap<local_ordinal_type, size_t, device_type>;
5913  padding_type padding (importLIDs.extent (0));
5914  auto numEnt = static_cast<size_t> (importLIDs.extent (0));
5915 
5916  auto importLIDs_h = importLIDs.view_host ();
5917  if (numPacketsPerLID.need_sync_host ()) {
5918  numPacketsPerLID.sync_host ();
5919  }
5920  auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
5921 
5922  for (size_t i = 0; i < numEnt; ++i) {
5923  auto result = padding.insert (importLIDs_h[i], numPacketsPerLID_h[i]);
5924  // FIXME (mfh 09 Apr 2019) See note in other computeCrsPaddingNew overload.
5925  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5926  (result.failed(), std::runtime_error,
5927  "unable to insert padding for LID " << importLIDs_h[i]);
5928  }
5929 
5930  TEUCHOS_TEST_FOR_EXCEPTION
5931  (padding.failed_insert(), std::runtime_error,
5932  "failed to insert one or more indices in to padding map");
5933  return padding;
5934  }
5935 
5936  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5937  void
5938  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5939 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
5940  packAndPrepareNew
5941 #else // TPETRA_ENABLE_DEPRECATED_CODE
5942  packAndPrepare
5943 #endif // TPETRA_ENABLE_DEPRECATED_CODE
5944  (const SrcDistObject& source,
5945  const Kokkos::DualView<const local_ordinal_type*,
5946  buffer_device_type>& exportLIDs,
5947  Kokkos::DualView<packet_type*,
5948  buffer_device_type>& exports,
5949  Kokkos::DualView<size_t*,
5950  buffer_device_type> numPacketsPerLID,
5951  size_t& constantNumPackets,
5952  Distributor& distor)
5953  {
5955  using GO = global_ordinal_type;
5956  using std::endl;
5957  using crs_graph_type =
5958  CrsGraph<local_ordinal_type, global_ordinal_type, node_type>;
5959  using row_graph_type =
5960  RowGraph<local_ordinal_type, global_ordinal_type, node_type>;
5961  const char tfecfFuncName[] = "packAndPrepare: ";
5962  ProfilingRegion region_papn ("Tpetra::CrsGraph::packAndPrepare");
5963 
5964  const bool debug = ::Tpetra::Details::Behavior::debug ("CrsGraph");
5965  std::unique_ptr<std::string> prefix;
5966  if (debug) {
5967  std::ostringstream os;
5968  const int myRank = this->getMap ()->getComm ()->getRank ();
5969  os << "Proc " << myRank << ": Tpetra::CrsGraph::packAndPrepare: ";
5970  prefix = std::unique_ptr<std::string> (new std::string (os.str ()));
5971  os << "Start" << endl;
5972  std::cerr << os.str ();
5973  }
5974 
5975  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5976  (exportLIDs.extent (0) != numPacketsPerLID.extent (0),
5977  std::runtime_error,
5978  "exportLIDs.extent(0) = " << exportLIDs.extent (0)
5979  << " != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent (0)
5980  << ".");
5981  const row_graph_type* srcRowGraphPtr =
5982  dynamic_cast<const row_graph_type*> (&source);
5983  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5984  (srcRowGraphPtr == nullptr, std::invalid_argument, "Source of an Export "
5985  "or Import operation to a CrsGraph must be a RowGraph with the same "
5986  "template parameters.");
5987  // We don't check whether src_graph has had fillComplete called,
5988  // because it doesn't matter whether the *source* graph has been
5989  // fillComplete'd. The target graph can not be fillComplete'd yet.
5990  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5991  (this->isFillComplete (), std::runtime_error,
5992  "The target graph of an Import or Export must not be fill complete.");
5993 
5994  const crs_graph_type* srcCrsGraphPtr =
5995  dynamic_cast<const crs_graph_type*> (&source);
5996 
5997  if (srcCrsGraphPtr == nullptr) {
5998  using Teuchos::ArrayView;
5999  using LO = local_ordinal_type;
6000 
6001  if (debug) {
6002  std::ostringstream os;
6003  os << *prefix << "Source is a RowGraph but not a CrsGraph" << endl;
6004  std::cerr << os.str ();
6005  }
6006  // RowGraph::pack serves the "old" DistObject interface. It
6007  // takes Teuchos::ArrayView and Teuchos::Array&. The latter
6008  // entails deep-copying the exports buffer on output. RowGraph
6009  // is a convenience interface when not a CrsGraph, so we accept
6010  // the performance hit.
6011  TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
6012  auto exportLIDs_h = exportLIDs.view_host ();
6013  ArrayView<const LO> exportLIDs_av (exportLIDs_h.data (),
6014  exportLIDs_h.extent (0));
6015  Teuchos::Array<GO> exports_a;
6016 
6017  numPacketsPerLID.clear_sync_state ();
6018  numPacketsPerLID.modify_host ();
6019  auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6020  ArrayView<size_t> numPacketsPerLID_av (numPacketsPerLID_h.data (),
6021  numPacketsPerLID_h.extent (0));
6022  srcRowGraphPtr->pack (exportLIDs_av, exports_a, numPacketsPerLID_av,
6023  constantNumPackets, distor);
6024  const size_t newSize = static_cast<size_t> (exports_a.size ());
6025  if (static_cast<size_t> (exports.extent (0)) != newSize) {
6026  using exports_dv_type = Kokkos::DualView<packet_type*, buffer_device_type>;
6027  exports = exports_dv_type ("exports", newSize);
6028  }
6029  Kokkos::View<const packet_type*, Kokkos::HostSpace,
6030  Kokkos::MemoryUnmanaged> exports_a_h (exports_a.getRawPtr (), newSize);
6031  exports.clear_sync_state ();
6032  exports.modify_host ();
6033  Kokkos::deep_copy (exports.view_host (), exports_a_h);
6034  }
6035  // packCrsGraphNew requires a valid localGraph.
6036  else if (! getColMap ().is_null () &&
6037  (lclGraph_.row_map.extent (0) != 0 ||
6038  getRowMap ()->getNodeNumElements () == 0)) {
6039  if (debug) {
6040  std::ostringstream os;
6041  os << *prefix << "packCrsGraphNew path" << endl;
6042  std::cerr << os.str ();
6043  }
6044  using export_pids_type =
6045  Kokkos::DualView<const int*, buffer_device_type>;
6046  export_pids_type exportPIDs; // not filling it; needed for syntax
6047  using LO = local_ordinal_type;
6048  using NT = node_type;
6050  packCrsGraphNew<LO,GO,NT> (*srcCrsGraphPtr, exportLIDs, exportPIDs,
6051  exports, numPacketsPerLID,
6052  constantNumPackets, false, distor);
6053  }
6054  else {
6055  srcCrsGraphPtr->packFillActiveNew (exportLIDs, exports, numPacketsPerLID,
6056  constantNumPackets, distor);
6057  }
6058 
6059  if (debug) {
6060  std::ostringstream os;
6061  os << *prefix << "Done" << endl;
6062  std::cerr << os.str ();
6063  }
6064  }
6065 
6066  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6067  void
6069  pack (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
6070  Teuchos::Array<GlobalOrdinal>& exports,
6071  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
6072  size_t& constantNumPackets,
6073  Distributor& distor) const
6074  {
6075  auto col_map = this->getColMap();
6076  // packCrsGraph requires a valid localGraph.
6077  if( !col_map.is_null() && (lclGraph_.row_map.extent(0) != 0 || getRowMap()->getNodeNumElements() ==0)) {
6079  packCrsGraph<LocalOrdinal,GlobalOrdinal,Node>(*this, exports, numPacketsPerLID,
6080  exportLIDs, constantNumPackets, distor);
6081  }
6082  else {
6083  this->packFillActive(exportLIDs, exports, numPacketsPerLID,
6084  constantNumPackets, distor);
6085  }
6086  }
6087 
6088  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6089  void
6091  packFillActive (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
6092  Teuchos::Array<GlobalOrdinal>& exports,
6093  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
6094  size_t& constantNumPackets,
6095  Distributor& /* distor */) const
6096  {
6097  typedef LocalOrdinal LO;
6098  typedef GlobalOrdinal GO;
6099  typedef typename Kokkos::View<size_t*,
6100  device_type>::HostMirror::execution_space host_execution_space;
6101  typedef typename device_type::execution_space device_execution_space;
6102  const char tfecfFuncName[] = "packFillActive: ";
6103  const bool debug = ::Tpetra::Details::Behavior::debug("CrsGraph::pack");
6104  const int myRank = debug ? this->getMap ()->getComm ()->getRank () : 0;
6105 
6106  const auto numExportLIDs = exportLIDs.size ();
6107  if (debug) {
6108  std::ostringstream os;
6109  os << "Proc " << myRank << ": CrsGraph::pack: numExportLIDs = "
6110  << numExportLIDs << std::endl;
6111  std::cerr << os.str ();
6112  }
6113  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6114  (numExportLIDs != numPacketsPerLID.size (), std::runtime_error,
6115  "exportLIDs.size() = " << numExportLIDs << " != numPacketsPerLID.size()"
6116  " = " << numPacketsPerLID.size () << ".");
6117 
6118  // We may be accessing UVM data on host below, so ensure that the
6119  // device is done accessing it.
6120  device_execution_space::fence ();
6121 
6122  const map_type& rowMap = * (this->getRowMap ());
6123  const map_type* const colMapPtr = this->colMap_.getRawPtr ();
6124  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6125  (this->isLocallyIndexed () && colMapPtr == nullptr, std::logic_error,
6126  "This graph claims to be locally indexed, but its column Map is nullptr. "
6127  "This should never happen. Please report this bug to the Tpetra "
6128  "developers.");
6129 
6130  // We may pack different amounts of data for different rows.
6131  constantNumPackets = 0;
6132 
6133  // mfh 20 Sep 2017: Teuchos::ArrayView isn't thread safe (well,
6134  // it might be now, but we might as well be safe).
6135  size_t* const numPacketsPerLID_raw = numPacketsPerLID.getRawPtr ();
6136  const LO* const exportLIDs_raw = exportLIDs.getRawPtr ();
6137 
6138  // Count the total number of packets (column indices, in the case
6139  // of a CrsGraph) to pack. While doing so, set
6140  // numPacketsPerLID[i] to the number of entries owned by the
6141  // calling process in (local) row exportLIDs[i] of the graph, that
6142  // the caller wants us to send out.
6143  Kokkos::RangePolicy<host_execution_space, LO> inputRange (0, numExportLIDs);
6144  size_t totalNumPackets = 0;
6145  size_t errCount = 0;
6146  // lambdas turn what they capture const, so we can't
6147  // atomic_add(&errCount,1). Instead, we need a View to modify.
6148  typedef Kokkos::Device<host_execution_space, Kokkos::HostSpace>
6149  host_device_type;
6150  Kokkos::View<size_t, host_device_type> errCountView (&errCount);
6151  constexpr size_t ONE = 1;
6152 
6153  Kokkos::parallel_reduce ("Tpetra::CrsGraph::pack: totalNumPackets",
6154  inputRange,
6155  [=] (const LO& i, size_t& curTotalNumPackets) {
6156  const GO gblRow = rowMap.getGlobalElement (exportLIDs_raw[i]);
6157  if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
6158  Kokkos::atomic_add (&errCountView(), ONE);
6159  numPacketsPerLID_raw[i] = 0;
6160  }
6161  else {
6162  const size_t numEnt = this->getNumEntriesInGlobalRow (gblRow);
6163  numPacketsPerLID_raw[i] = numEnt;
6164  curTotalNumPackets += numEnt;
6165  }
6166  },
6167  totalNumPackets);
6168 
6169  if (debug) {
6170  std::ostringstream os;
6171  os << "Proc " << myRank << ": CrsGraph::pack: "
6172  << "totalNumPackets = " << totalNumPackets << std::endl;
6173  std::cerr << os.str ();
6174  }
6175  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6176  (errCount != 0, std::logic_error, "totalNumPackets count encountered "
6177  "one or more errors! errCount = " << errCount
6178  << ", totalNumPackets = " << totalNumPackets << ".");
6179  errCount = 0;
6180 
6181  // Allocate space for all the column indices to pack.
6182  exports.resize (totalNumPackets);
6183 
6184  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6185  (! this->supportsRowViews (), std::logic_error,
6186  "this->supportsRowViews() returns false; this should never happen. "
6187  "Please report this bug to the Tpetra developers.");
6188 
6189  // Loop again over the rows to export, and pack rows of indices
6190  // into the output buffer.
6191 
6192  if (debug) {
6193  std::ostringstream os;
6194  os << "Proc " << myRank << ": CrsGraph::pack: pack into exports" << std::endl;
6195  std::cerr << os.str ();
6196  }
6197 
6198  // Teuchos::ArrayView may not be thread safe, or may not be
6199  // efficiently thread safe. Better to use the raw pointer.
6200  GO* const exports_raw = exports.getRawPtr ();
6201  errCount = 0;
6202  Kokkos::parallel_scan ("Tpetra::CrsGraph::pack: pack from views",
6203  inputRange,
6204  [=] (const LO& i, size_t& exportsOffset, const bool final) {
6205  const size_t curOffset = exportsOffset;
6206  const GO gblRow = rowMap.getGlobalElement (exportLIDs_raw[i]);
6207  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (gblRow);
6208 
6209  if (rowInfo.localRow == Tpetra::Details::OrdinalTraits<size_t>::invalid ()) {
6210  if (debug) {
6211  std::ostringstream os;
6212  os << "Proc " << myRank << ": INVALID rowInfo: "
6213  << "i = " << i << ", lclRow = " << exportLIDs_raw[i] << std::endl;
6214  std::cerr << os.str ();
6215  }
6216  Kokkos::atomic_add (&errCountView(), ONE);
6217  }
6218  else if (curOffset + rowInfo.numEntries > totalNumPackets) {
6219  if (debug) {
6220  std::ostringstream os;
6221  os << "Proc " << myRank << ": UH OH! For i=" << i << ", lclRow="
6222  << exportLIDs_raw[i] << ", gblRow=" << gblRow << ", curOffset "
6223  "(= " << curOffset << ") + numEnt (= " << rowInfo.numEntries
6224  << ") > totalNumPackets (= " << totalNumPackets << ")."
6225  << std::endl;
6226  std::cerr << os.str ();
6227  }
6228  Kokkos::atomic_add (&errCountView(), ONE);
6229  }
6230  else {
6231  const LO numEnt = static_cast<LO> (rowInfo.numEntries);
6232  if (this->isLocallyIndexed ()) {
6233  const LO* lclColInds = nullptr;
6234  LO capacity = 0;
6235  const LO errCode =
6236  this->getLocalViewRawConst (lclColInds, capacity, rowInfo);
6237  if (errCode == 0) {
6238  if (final) {
6239  for (LO k = 0; k < numEnt; ++k) {
6240  const LO lclColInd = lclColInds[k];
6241  const GO gblColInd = colMapPtr->getGlobalElement (lclColInd);
6242  // Pack it, even if it's wrong. Let the receiving
6243  // process deal with it. Otherwise, we'll miss out
6244  // on any correct data.
6245  exports_raw[curOffset + k] = gblColInd;
6246  } // for each entry in the row
6247  } // final pass?
6248  exportsOffset = curOffset + numEnt;
6249  }
6250  else { // error in getting local row view
6251  Kokkos::atomic_add (&errCountView(), ONE);
6252  }
6253  }
6254  else if (this->isGloballyIndexed ()) {
6255  const GO* gblColInds = nullptr;
6256  LO capacity = 0;
6257  const LO errCode =
6258  this->getGlobalViewRawConst (gblColInds, capacity, rowInfo);
6259  if (errCode == 0) {
6260  if (final) {
6261  for (LO k = 0; k < numEnt; ++k) {
6262  const GO gblColInd = gblColInds[k];
6263  // Pack it, even if it's wrong. Let the receiving
6264  // process deal with it. Otherwise, we'll miss out
6265  // on any correct data.
6266  exports_raw[curOffset + k] = gblColInd;
6267  } // for each entry in the row
6268  } // final pass?
6269  exportsOffset = curOffset + numEnt;
6270  }
6271  else { // error in getting global row view
6272  Kokkos::atomic_add (&errCountView(), ONE);
6273  }
6274  }
6275  // If neither globally nor locally indexed, then the graph
6276  // has no entries in this row (or indeed, in any row on this
6277  // process) to pack.
6278  }
6279  });
6280 
6281  // We may have accessed UVM data on host above, so ensure that the
6282  // device sees these changes.
6283  device_execution_space::fence ();
6284 
6285  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6286  (errCount != 0, std::logic_error, "Packing encountered "
6287  "one or more errors! errCount = " << errCount
6288  << ", totalNumPackets = " << totalNumPackets << ".");
6289  }
6290 
6291  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6292  void
6293  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
6294  packFillActiveNew (const Kokkos::DualView<const local_ordinal_type*,
6295  buffer_device_type>& exportLIDs,
6296  Kokkos::DualView<packet_type*,
6297  buffer_device_type>& exports,
6298  Kokkos::DualView<size_t*,
6299  buffer_device_type> numPacketsPerLID,
6300  size_t& constantNumPackets,
6301  Distributor& distor) const
6302  {
6303  using std::endl;
6304  using LO = local_ordinal_type;
6305  using GO = global_ordinal_type;
6306  using host_execution_space = typename Kokkos::View<size_t*,
6307  device_type>::HostMirror::execution_space;
6308  using host_device_type =
6309  Kokkos::Device<host_execution_space, Kokkos::HostSpace>;
6310  using device_execution_space = typename device_type::execution_space;
6311  using exports_dv_type =
6312  Kokkos::DualView<packet_type*, buffer_device_type>;
6313  const char tfecfFuncName[] = "packFillActiveNew: ";
6314  const bool debug = ::Tpetra::Details::Behavior::debug ("CrsGraph");
6315  const int myRank = debug ? this->getMap ()->getComm ()->getRank () : 0;
6316 
6317  std::unique_ptr<std::string> prefix;
6318  if (debug) {
6319  std::ostringstream os;
6320  os << "Proc " << myRank << ": Tpetra::CrsGraph::packFillActiveNew: ";
6321  prefix = std::unique_ptr<std::string> (new std::string (os.str ()));
6322  os << "Start" << endl;
6323  std::cerr << os.str ();
6324  }
6325 
6326  const auto numExportLIDs = exportLIDs.extent (0);
6327  if (debug) {
6328  std::ostringstream os;
6329  os << *prefix << "numExportLIDs: " << numExportLIDs
6330  << ", numPacketsPerLID.extent(0): " << numPacketsPerLID.extent (0)
6331  << endl;
6332  std::cerr << os.str ();
6333  }
6334  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6335  (numExportLIDs != numPacketsPerLID.extent (0), std::runtime_error,
6336  "exportLIDs.extent(0) = " << numExportLIDs
6337  << " != numPacketsPerLID.extent(0) = "
6338  << numPacketsPerLID.extent (0) << ".");
6339  TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
6340  auto exportLIDs_h = exportLIDs.view_host ();
6341 
6342  // We may be accessing UVM data on host below, so ensure that the
6343  // device is done accessing it.
6344  device_execution_space::fence ();
6345 
6346  const map_type& rowMap = * (this->getRowMap ());
6347  const map_type* const colMapPtr = this->colMap_.getRawPtr ();
6348  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6349  (this->isLocallyIndexed () && colMapPtr == nullptr, std::logic_error,
6350  "This graph claims to be locally indexed, but its column Map is nullptr. "
6351  "This should never happen. Please report this bug to the Tpetra "
6352  "developers.");
6353 
6354  // We may pack different amounts of data for different rows.
6355  constantNumPackets = 0;
6356 
6357  numPacketsPerLID.clear_sync_state ();
6358  numPacketsPerLID.modify_host ();
6359  auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6360 
6361  // Count the total number of packets (column indices, in the case
6362  // of a CrsGraph) to pack. While doing so, set
6363  // numPacketsPerLID[i] to the number of entries owned by the
6364  // calling process in (local) row exportLIDs[i] of the graph, that
6365  // the caller wants us to send out.
6366  using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
6367  range_type inputRange (0, numExportLIDs);
6368  size_t totalNumPackets = 0;
6369  size_t errCount = 0;
6370  // lambdas turn what they capture const, so we can't
6371  // atomic_add(&errCount,1). Instead, we need a View to modify.
6372  Kokkos::View<size_t, host_device_type> errCountView (&errCount);
6373  constexpr size_t ONE = 1;
6374 
6375  if (debug) {
6376  std::ostringstream os;
6377  os << *prefix << "Compute totalNumPackets" << endl;
6378  std::cerr << os.str ();
6379  }
6380 
6381  Kokkos::parallel_reduce
6382  ("Tpetra::CrsGraph::pack: totalNumPackets",
6383  inputRange,
6384  [=] (const LO i, size_t& curTotalNumPackets) {
6385  const LO lclRow = exportLIDs_h[i];
6386  const GO gblRow = rowMap.getGlobalElement (lclRow);
6387  if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
6388  if (debug) {
6389  std::ostringstream os;
6390  os << "Proc " << myRank << ": For i=" << i << ", lclRow="
6391  << lclRow << " not in row Map on this process" << endl;
6392  std::cerr << os.str ();
6393  }
6394  Kokkos::atomic_add (&errCountView(), ONE);
6395  numPacketsPerLID_h(i) = 0;
6396  }
6397  else {
6398  const size_t numEnt = this->getNumEntriesInGlobalRow (gblRow);
6399  numPacketsPerLID_h(i) = numEnt;
6400  curTotalNumPackets += numEnt;
6401  }
6402  },
6403  totalNumPackets);
6404 
6405  if (debug) {
6406  std::ostringstream os;
6407  os << *prefix << "totalNumPackets: " << totalNumPackets
6408  << ", errCount: " << errCount << endl;
6409  std::cerr << os.str ();
6410  }
6411  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6412  (errCount != 0, std::logic_error, "totalNumPackets count encountered "
6413  "one or more errors! totalNumPackets: " << totalNumPackets
6414  << ", errCount: " << errCount << ".");
6415 
6416  // Allocate space for all the column indices to pack.
6417  if (static_cast<size_t> (exports.extent (0)) < totalNumPackets) {
6418  // FIXME (mfh 09 Apr 2019) Create without initializing.
6419  exports = exports_dv_type ("exports", totalNumPackets);
6420  }
6421 
6422  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6423  (! this->supportsRowViews (), std::logic_error,
6424  "this->supportsRowViews() returns false; this should never happen. "
6425  "Please report this bug to the Tpetra developers.");
6426 
6427  // Loop again over the rows to export, and pack rows of indices
6428  // into the output buffer.
6429 
6430  if (debug) {
6431  std::ostringstream os;
6432  os << *prefix << "Pack into exports buffer" << endl;
6433  std::cerr << os.str ();
6434  }
6435 
6436  exports.clear_sync_state ();
6437  exports.modify_host ();
6438  auto exports_h = exports.view_host ();
6439 
6440  // The graph may store its data in UVM memory, so make sure that
6441  // any device kernels are done modifying the graph's data before
6442  // reading the data.
6443  device_execution_space::fence ();
6444 
6445  errCount = 0;
6446  Kokkos::parallel_scan
6447  ("Tpetra::CrsGraph::packFillActiveNew: Pack exports",
6448  inputRange,
6449  [=] (const LO i, size_t& exportsOffset, const bool final) {
6450  const size_t curOffset = exportsOffset;
6451  const LO lclRow = exportLIDs_h(i);
6452  const GO gblRow = rowMap.getGlobalElement (lclRow);
6453  if (gblRow == Details::OrdinalTraits<GO>::invalid ()) {
6454  if (debug) {
6455  std::ostringstream os;
6456  os << "Proc " << myRank << ": For i=" << i << ", lclRow="
6457  << lclRow << " not in row Map on this process" << endl;
6458  std::cerr << os.str ();
6459  }
6460  Kokkos::atomic_add (&errCountView(), ONE);
6461  return;
6462  }
6463 
6464  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (gblRow);
6465  if (rowInfo.localRow == Details::OrdinalTraits<size_t>::invalid ()) {
6466  if (debug) {
6467  std::ostringstream os;
6468  os << "Proc " << myRank << ": For i=" << i << ", lclRow="
6469  << lclRow << ", gblRow=" << gblRow << ": invalid rowInfo"
6470  << endl;
6471  std::cerr << os.str ();
6472  }
6473  Kokkos::atomic_add (&errCountView(), ONE);
6474  return;
6475  }
6476 
6477  if (curOffset + rowInfo.numEntries > totalNumPackets) {
6478  if (debug) {
6479  std::ostringstream os;
6480  os << "Proc " << myRank << ": For i=" << i << ", lclRow="
6481  << lclRow << ", gblRow=" << gblRow << ", curOffset "
6482  "(= " << curOffset << ") + numEnt (= " << rowInfo.numEntries
6483  << ") > totalNumPackets (= " << totalNumPackets << ")."
6484  << endl;
6485  std::cerr << os.str ();
6486  }
6487  Kokkos::atomic_add (&errCountView(), ONE);
6488  return;
6489  }
6490 
6491  const LO numEnt = static_cast<LO> (rowInfo.numEntries);
6492  if (this->isLocallyIndexed ()) {
6493  const LO* lclColInds = nullptr;
6494  LO capacity = 0;
6495  const LO errCode =
6496  this->getLocalViewRawConst (lclColInds, capacity, rowInfo);
6497  if (errCode == 0) {
6498  if (final) {
6499  for (LO k = 0; k < numEnt; ++k) {
6500  const LO lclColInd = lclColInds[k];
6501  const GO gblColInd = colMapPtr->getGlobalElement (lclColInd);
6502  // Pack it, even if it's wrong. Let the receiving
6503  // process deal with it. Otherwise, we'll miss out
6504  // on any correct data.
6505  exports_h(curOffset + k) = gblColInd;
6506  } // for each entry in the row
6507  } // final pass?
6508  exportsOffset = curOffset + numEnt;
6509  }
6510  else { // error in getting local row view
6511  if (debug) {
6512  std::ostringstream os;
6513  os << "Proc " << myRank << ": For i=" << i << ", lclRow="
6514  << lclRow << ", gblRow=" << gblRow << ": "
6515  "getLocalViewRawConst returned nonzero error code "
6516  << errCode << endl;
6517  std::cerr << os.str ();
6518  }
6519  Kokkos::atomic_add (&errCountView(), ONE);
6520  }
6521  }
6522  else if (this->isGloballyIndexed ()) {
6523  const GO* gblColInds = nullptr;
6524  LO capacity = 0;
6525  const LO errCode =
6526  this->getGlobalViewRawConst (gblColInds, capacity, rowInfo);
6527  if (errCode == 0) {
6528  if (final) {
6529  for (LO k = 0; k < numEnt; ++k) {
6530  const GO gblColInd = gblColInds[k];
6531  // Pack it, even if it's wrong. Let the receiving
6532  // process deal with it. Otherwise, we'll miss out
6533  // on any correct data.
6534  exports_h(curOffset + k) = gblColInd;
6535  } // for each entry in the row
6536  } // final pass?
6537  exportsOffset = curOffset + numEnt;
6538  }
6539  else { // error in getting global row view
6540  if (debug) {
6541  std::ostringstream os;
6542  os << "Proc " << myRank << ": For i=" << i << ", lclRow="
6543  << lclRow << ", gblRow=" << gblRow << ": "
6544  "getGlobalViewRawConst returned nonzero error code "
6545  << errCode << endl;
6546  std::cerr << os.str ();
6547  }
6548  Kokkos::atomic_add (&errCountView(), ONE);
6549  }
6550  }
6551  // If neither globally nor locally indexed, then the graph
6552  // has no entries in this row (or indeed, in any row on this
6553  // process) to pack.
6554  });
6555 
6556  // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6557  // (errCount != 0, std::logic_error, "Packing encountered "
6558  // "one or more errors! errCount = " << errCount
6559  // << ", totalNumPackets = " << totalNumPackets << ".");
6560 
6561  if (debug) {
6562  std::ostringstream os;
6563  os << *prefix << "errCount = " << errCount << "; Done" << endl;
6564  std::cerr << os.str ();
6565  }
6566  }
6567 
6568  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6569  void
6570  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
6571 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
6572  unpackAndCombineNew
6573 #else // TPETRA_ENABLE_DEPRECATED_CODE
6574  unpackAndCombine
6575 #endif // TPETRA_ENABLE_DEPRECATED_CODE
6576  (const Kokkos::DualView<const local_ordinal_type*,
6577  buffer_device_type>& importLIDs,
6578  Kokkos::DualView<packet_type*,
6579  buffer_device_type> imports,
6580  Kokkos::DualView<size_t*,
6581  buffer_device_type> numPacketsPerLID,
6582  const size_t /* constantNumPackets */,
6583  Distributor& /* distor */,
6584  const CombineMode /* combineMode */ )
6585  {
6586  using std::endl;
6587  using LO = local_ordinal_type;
6588  using GO = global_ordinal_type;
6589  const char tfecfFuncName[] = "unpackAndCombine: ";
6590  const bool debug = ::Tpetra::Details::Behavior::debug ("CrsGraph");
6591 
6592  std::unique_ptr<std::string> prefix;
6593  if (debug) {
6594  std::ostringstream os;
6595  const int myRank = this->getMap ()->getComm ()->getRank ();
6596  os << "Proc " << myRank << ": Tpetra::CrsGraph::unpackAndCombine: ";
6597  prefix = std::unique_ptr<std::string> (new std::string (os.str ()));
6598  os << endl;
6599  std::cerr << os.str ();
6600  }
6601 
6602  if (this->getProfileType () == StaticProfile) {
6603  auto padding = computeCrsPaddingNew (importLIDs, numPacketsPerLID);
6604  applyCrsPadding (padding);
6605  }
6606  // FIXME (mfh 02 Apr 2012) REPLACE combine mode has a perfectly
6607  // reasonable meaning, whether or not the matrix is fill complete.
6608  // It's just more work to implement.
6609 
6610  // We are not checking the value of the CombineMode input
6611  // argument. For CrsGraph, we only support import/export
6612  // operations if fillComplete has not yet been called. Any
6613  // incoming column-indices are inserted into the target graph. In
6614  // this context, CombineMode values of ADD vs INSERT are
6615  // equivalent. What is the meaning of REPLACE for CrsGraph? If a
6616  // duplicate column-index is inserted, it will be compressed out
6617  // when fillComplete is called.
6618  //
6619  // Note: I think REPLACE means that an existing row is replaced by
6620  // the imported row, i.e., the existing indices are cleared. CGB,
6621  // 6/17/2010
6622 
6623  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6624  (importLIDs.extent (0) != numPacketsPerLID.extent (0),
6625  std::runtime_error, "importLIDs.extent(0) = "
6626  << importLIDs.extent (0) << " != numPacketsPerLID.extent(0) = "
6627  << numPacketsPerLID.extent (0) << ".");
6628  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6629  (isFillComplete (), std::runtime_error,
6630  "Import or Export operations are not allowed on the destination "
6631  "CrsGraph if it is fill complete.");
6632 
6633  const size_t numImportLIDs = static_cast<size_t> (importLIDs.extent (0));
6634  if (numPacketsPerLID.need_sync_host ()) {
6635  numPacketsPerLID.sync_host ();
6636  }
6637  auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6638 
6639  // If we're inserting in local indices, let's pre-allocate
6640  Teuchos::Array<LO> lclColInds;
6641  if (this->isLocallyIndexed ()) {
6642  size_t maxNumInserts = 0;
6643  for (size_t i = 0; i < numImportLIDs; ++i) {
6644  maxNumInserts = std::max (maxNumInserts, numPacketsPerLID_h[i]);
6645  }
6646  lclColInds.resize (maxNumInserts);
6647  }
6648 
6649  auto importLIDs_h = importLIDs.view_host ();
6650  if (imports.need_sync_host ()) {
6651  imports.sync_host ();
6652  }
6653  auto imports_h = imports.view_host ();
6654 
6655  const map_type& rowMap = * (this->rowMap_);
6656  size_t importsOffset = 0;
6657  for (size_t i = 0; i < numImportLIDs; ++i) {
6658  const LO lclRow = importLIDs_h[i];
6659  const GO gblRow = rowMap.getGlobalElement (lclRow);
6660  const LO numEnt = numPacketsPerLID_h[i];
6661  const GO* const gblColInds = (numEnt == 0) ? nullptr :
6662  &imports_h[importsOffset];
6663  if (! this->isLocallyIndexed ()) {
6664  if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
6665  // This row is not in the row Map on the calling process.
6666  this->insertGlobalIndicesIntoNonownedRows (gblRow, gblColInds, numEnt);
6667  }
6668  else {
6669  this->insertGlobalIndicesFiltered (lclRow, gblColInds, numEnt);
6670  }
6671  }
6672  else {
6673  for (LO j = 0; j < numEnt; j++) {
6674  lclColInds[j] = this->colMap_->getLocalElement (gblColInds[j]);
6675  }
6676  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6677  (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid (),
6678  std::runtime_error,
6679  "cannot insert into unowned rows if isLocallyIndexed().");
6680  this->insertLocalIndices (lclRow, numEnt, lclColInds.data ());
6681  }
6682  importsOffset += numEnt;
6683  }
6684 
6685 
6686  if (debug) {
6687  std::ostringstream os;
6688  os << *prefix << "Done" << endl;
6689  std::cerr << os.str ();
6690  }
6691  }
6692 
6693  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6694  void
6696  removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& newMap)
6697  {
6698  using Teuchos::Comm;
6699  using Teuchos::null;
6700  using Teuchos::ParameterList;
6701  using Teuchos::RCP;
6702 
6703  // We'll set all the state "transactionally," so that this method
6704  // satisfies the strong exception guarantee. This object's state
6705  // won't be modified until the end of this method.
6706  RCP<const map_type> rowMap, domainMap, rangeMap, colMap;
6707  RCP<import_type> importer;
6708  RCP<export_type> exporter;
6709 
6710  rowMap = newMap;
6711  RCP<const Comm<int> > newComm =
6712  (newMap.is_null ()) ? null : newMap->getComm ();
6713 
6714  if (! domainMap_.is_null ()) {
6715  if (domainMap_.getRawPtr () == rowMap_.getRawPtr ()) {
6716  // Common case: original domain and row Maps are identical.
6717  // In that case, we need only replace the original domain Map
6718  // with the new Map. This ensures that the new domain and row
6719  // Maps _stay_ identical.
6720  domainMap = newMap;
6721  } else {
6722  domainMap = domainMap_->replaceCommWithSubset (newComm);
6723  }
6724  }
6725  if (! rangeMap_.is_null ()) {
6726  if (rangeMap_.getRawPtr () == rowMap_.getRawPtr ()) {
6727  // Common case: original range and row Maps are identical. In
6728  // that case, we need only replace the original range Map with
6729  // the new Map. This ensures that the new range and row Maps
6730  // _stay_ identical.
6731  rangeMap = newMap;
6732  } else {
6733  rangeMap = rangeMap_->replaceCommWithSubset (newComm);
6734  }
6735  }
6736  if (! colMap.is_null ()) {
6737  colMap = colMap_->replaceCommWithSubset (newComm);
6738  }
6739 
6740  // (Re)create the Export and / or Import if necessary.
6741  if (! newComm.is_null ()) {
6742  RCP<ParameterList> params = this->getNonconstParameterList (); // could be null
6743  //
6744  // The operations below are collective on the new communicator.
6745  //
6746  // (Re)create the Export object if necessary. If I haven't
6747  // called fillComplete yet, I don't have a rangeMap, so I must
6748  // first check if the _original_ rangeMap is not null. Ditto
6749  // for the Import object and the domain Map.
6750  if (! rangeMap_.is_null () &&
6751  rangeMap != rowMap &&
6752  ! rangeMap->isSameAs (*rowMap)) {
6753  if (params.is_null () || ! params->isSublist ("Export")) {
6754  exporter = rcp (new export_type (rowMap, rangeMap));
6755  }
6756  else {
6757  RCP<ParameterList> exportSublist = sublist (params, "Export", true);
6758  exporter = rcp (new export_type (rowMap, rangeMap, exportSublist));
6759  }
6760  }
6761  // (Re)create the Import object if necessary.
6762  if (! domainMap_.is_null () &&
6763  domainMap != colMap &&
6764  ! domainMap->isSameAs (*colMap)) {
6765  if (params.is_null () || ! params->isSublist ("Import")) {
6766  importer = rcp (new import_type (domainMap, colMap));
6767  } else {
6768  RCP<ParameterList> importSublist = sublist (params, "Import", true);
6769  importer = rcp (new import_type (domainMap, colMap, importSublist));
6770  }
6771  }
6772  } // if newComm is not null
6773 
6774  // Defer side effects until the end. If no destructors throw
6775  // exceptions (they shouldn't anyway), then this method satisfies
6776  // the strong exception guarantee.
6777  exporter_ = exporter;
6778  importer_ = importer;
6779  rowMap_ = rowMap;
6780  // mfh 31 Mar 2013: DistObject's map_ is the row Map of a CrsGraph
6781  // or CrsMatrix. CrsGraph keeps a redundant pointer (rowMap_) to
6782  // the same object. We might want to get rid of this redundant
6783  // pointer sometime, but for now, we'll leave it alone and just
6784  // set map_ to the same object.
6785  this->map_ = rowMap;
6786  domainMap_ = domainMap;
6787  rangeMap_ = rangeMap;
6788  colMap_ = colMap;
6789  }
6790 
6791  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6792  void
6794  getLocalDiagOffsets (const Kokkos::View<size_t*, device_type, Kokkos::MemoryUnmanaged>& offsets) const
6795  {
6796  typedef LocalOrdinal LO;
6797  typedef GlobalOrdinal GO;
6798  const char tfecfFuncName[] = "getLocalDiagOffsets: ";
6799  const bool debug = ::Tpetra::Details::Behavior::debug ();
6800 
6801  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6802  (! hasColMap (), std::runtime_error, "The graph must have a column Map.");
6803  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
6804  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6805  (static_cast<LO> (offsets.extent (0)) < lclNumRows,
6806  std::invalid_argument, "offsets.extent(0) = " <<
6807  offsets.extent (0) << " < getNodeNumRows() = " << lclNumRows << ".");
6808 
6809  const map_type& rowMap = * (this->getRowMap ());
6810  const map_type& colMap = * (this->getColMap ());
6811 
6812  // We only use these in debug mode, but since debug mode is a
6813  // run-time option, they need to exist here. That's why we create
6814  // the vector with explicit size zero, to avoid overhead if debug
6815  // mode is off.
6816  bool allRowMapDiagEntriesInColMap = true;
6817  bool allDiagEntriesFound = true;
6818  bool allOffsetsCorrect = true;
6819  bool noOtherWeirdness = true;
6820  using wrong_offsets_type = std::vector<std::pair<LO, size_t> >;
6821  wrong_offsets_type wrongOffsets (0);
6822 
6823  // mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just
6824  // the subset of Map functionality that we need below.
6825  auto lclRowMap = rowMap.getLocalMap ();
6826  auto lclColMap = colMap.getLocalMap ();
6827 
6828  // FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this
6829  // setup, at least on the host. For CUDA, we have to use LocalMap
6830  // (that comes from each of the two Maps).
6831 
6832  const bool sorted = this->isSorted ();
6833  if (isFillComplete ()) {
6834  auto lclGraph = this->getLocalGraph ();
6835  ::Tpetra::Details::getGraphDiagOffsets (offsets, lclRowMap, lclColMap,
6836  lclGraph.row_map,
6837  lclGraph.entries, sorted);
6838  }
6839  else {
6840  // NOTE (mfh 22 Feb 2017): We have to run this code on host,
6841  // since the graph is not fill complete. The previous version
6842  // of this code assumed UVM; this version does not.
6843  auto offsets_h = Kokkos::create_mirror_view (offsets);
6844 
6845  for (LO lclRowInd = 0; lclRowInd < lclNumRows; ++lclRowInd) {
6846  // Find the diagonal entry. Since the row Map and column Map
6847  // may differ, we have to compare global row and column
6848  // indices, not local.
6849  const GO gblRowInd = lclRowMap.getGlobalElement (lclRowInd);
6850  const GO gblColInd = gblRowInd;
6851  const LO lclColInd = lclColMap.getLocalElement (gblColInd);
6852 
6853  if (lclColInd == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
6854  allRowMapDiagEntriesInColMap = false;
6855  offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
6856  }
6857  else {
6858  const RowInfo rowInfo = this->getRowInfo (lclRowInd);
6859  if (static_cast<LO> (rowInfo.localRow) == lclRowInd &&
6860  rowInfo.numEntries > 0) {
6861 
6862  auto colInds = this->getLocalKokkosRowView (rowInfo);
6863  const size_t hint = 0; // not needed for this algorithm
6864  const size_t offset =
6865  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
6866  lclColInd, hint, sorted);
6867  offsets_h(lclRowInd) = offset;
6868 
6869  if (debug) {
6870  // Now that we have what we think is an offset, make sure
6871  // that it really does point to the diagonal entry. Offsets
6872  // are _relative_ to each row, not absolute (for the whole
6873  // (local) graph).
6874  Teuchos::ArrayView<const LO> lclColInds;
6875  try {
6876  this->getLocalRowView (lclRowInd, lclColInds);
6877  }
6878  catch (...) {
6879  noOtherWeirdness = false;
6880  }
6881  // Don't continue with error checking if the above failed.
6882  if (noOtherWeirdness) {
6883  const size_t numEnt = lclColInds.size ();
6884  if (offset >= numEnt) {
6885  // Offsets are relative to each row, so this means that
6886  // the offset is out of bounds.
6887  allOffsetsCorrect = false;
6888  wrongOffsets.push_back (std::make_pair (lclRowInd, offset));
6889  } else {
6890  const LO actualLclColInd = lclColInds[offset];
6891  const GO actualGblColInd = lclColMap.getGlobalElement (actualLclColInd);
6892  if (actualGblColInd != gblColInd) {
6893  allOffsetsCorrect = false;
6894  wrongOffsets.push_back (std::make_pair (lclRowInd, offset));
6895  }
6896  }
6897  }
6898  } // debug
6899  }
6900  else { // either row is empty, or something went wrong w/ getRowInfo()
6901  offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
6902  allDiagEntriesFound = false;
6903  }
6904  } // whether lclColInd is a valid local column index
6905  } // for each local row
6906 
6907  Kokkos::deep_copy (offsets, offsets_h);
6908  } // whether the graph is fill complete
6909 
6910  if (debug) {
6911  if (wrongOffsets.size () != 0) {
6912  std::ostringstream os;
6913  os << "Proc " << this->getComm ()->getRank () << ": Wrong offsets: [";
6914  for (size_t k = 0; k < wrongOffsets.size (); ++k) {
6915  os << "(" << wrongOffsets[k].first << ","
6916  << wrongOffsets[k].second << ")";
6917  if (k + 1 < wrongOffsets.size ()) {
6918  os << ", ";
6919  }
6920  }
6921  os << "]" << std::endl;
6922  std::cerr << os.str ();
6923  }
6924  } // debug
6925 
6926  if (debug) {
6927  using Teuchos::reduceAll;
6928  using std::endl;
6929  Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getComm ();
6930  const bool localSuccess =
6931  allRowMapDiagEntriesInColMap && allDiagEntriesFound && allOffsetsCorrect;
6932  const int numResults = 5;
6933  int lclResults[5];
6934  lclResults[0] = allRowMapDiagEntriesInColMap ? 1 : 0;
6935  lclResults[1] = allDiagEntriesFound ? 1 : 0;
6936  lclResults[2] = allOffsetsCorrect ? 1 : 0;
6937  lclResults[3] = noOtherWeirdness ? 1 : 0;
6938  // min-all-reduce will compute least rank of all the processes
6939  // that didn't succeed.
6940  lclResults[4] = ! localSuccess ? comm->getRank () : comm->getSize ();
6941 
6942  int gblResults[5];
6943  gblResults[0] = 0;
6944  gblResults[1] = 0;
6945  gblResults[2] = 0;
6946  gblResults[3] = 0;
6947  gblResults[4] = 0;
6948  reduceAll<int, int> (*comm, Teuchos::REDUCE_MIN,
6949  numResults, lclResults, gblResults);
6950 
6951  if (gblResults[0] != 1 || gblResults[1] != 1 || gblResults[2] != 1
6952  || gblResults[3] != 1) {
6953  std::ostringstream os; // build error message
6954  os << "Issue(s) that we noticed (on Process " << gblResults[4] << ", "
6955  "possibly among others): " << endl;
6956  if (gblResults[0] == 0) {
6957  os << " - The column Map does not contain at least one diagonal entry "
6958  "of the graph." << endl;
6959  }
6960  if (gblResults[1] == 0) {
6961  os << " - On one or more processes, some row does not contain a "
6962  "diagonal entry." << endl;
6963  }
6964  if (gblResults[2] == 0) {
6965  os << " - On one or more processes, some offsets are incorrect."
6966  << endl;
6967  }
6968  if (gblResults[3] == 0) {
6969  os << " - One or more processes had some other error."
6970  << endl;
6971  }
6972  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str());
6973  }
6974  } // debug
6975  }
6976 
6977  namespace { // (anonymous)
6978 
6979  // mfh 21 Jan 2016: This is useful for getLocalDiagOffsets (see
6980  // below). The point is to avoid the deep copy between the input
6981  // Teuchos::ArrayRCP and the internally used Kokkos::View. We
6982  // can't use UVM to avoid the deep copy with CUDA, because the
6983  // ArrayRCP is a host pointer, while the input to the graph's
6984  // getLocalDiagOffsets method is a device pointer. Assigning a
6985  // host pointer to a device pointer is incorrect unless the host
6986  // pointer points to host pinned memory. The goal is to get rid
6987  // of the Teuchos::ArrayRCP overload anyway, so we accept the deep
6988  // copy for backwards compatibility.
6989  //
6990  // We have to use template magic because
6991  // "staticGraph_->getLocalDiagOffsets(offsetsHosts)" won't compile
6992  // if device_type::memory_space is not Kokkos::HostSpace (as is
6993  // the case with CUDA).
6994 
6995  template<class DeviceType,
6996  const bool memSpaceIsHostSpace =
6997  std::is_same<typename DeviceType::memory_space,
6998  Kokkos::HostSpace>::value>
6999  struct HelpGetLocalDiagOffsets {};
7000 
7001  template<class DeviceType>
7002  struct HelpGetLocalDiagOffsets<DeviceType, true> {
7003  typedef DeviceType device_type;
7004  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
7005  Kokkos::MemoryUnmanaged> device_offsets_type;
7006  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
7007  Kokkos::MemoryUnmanaged> host_offsets_type;
7008 
7009  static device_offsets_type
7010  getDeviceOffsets (const host_offsets_type& hostOffsets)
7011  {
7012  // Host and device are the same; no need to allocate a
7013  // temporary device View.
7014  return hostOffsets;
7015  }
7016 
7017  static void
7018  copyBackIfNeeded (const host_offsets_type& /* hostOffsets */,
7019  const device_offsets_type& /* deviceOffsets */)
7020  { /* copy back not needed; host and device are the same */ }
7021  };
7022 
7023  template<class DeviceType>
7024  struct HelpGetLocalDiagOffsets<DeviceType, false> {
7025  typedef DeviceType device_type;
7026  // We have to do a deep copy, since host memory space != device
7027  // memory space. Thus, the device View is managed (we need to
7028  // allocate a temporary device View).
7029  typedef Kokkos::View<size_t*, device_type> device_offsets_type;
7030  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
7031  Kokkos::MemoryUnmanaged> host_offsets_type;
7032 
7033  static device_offsets_type
7034  getDeviceOffsets (const host_offsets_type& hostOffsets)
7035  {
7036  // Host memory space != device memory space, so we must
7037  // allocate a temporary device View for the graph.
7038  return device_offsets_type ("offsets", hostOffsets.extent (0));
7039  }
7040 
7041  static void
7042  copyBackIfNeeded (const host_offsets_type& hostOffsets,
7043  const device_offsets_type& deviceOffsets)
7044  {
7045  Kokkos::deep_copy (hostOffsets, deviceOffsets);
7046  }
7047  };
7048  } // namespace (anonymous)
7049 
7050 
7051  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7052  void
7054  getLocalDiagOffsets (Teuchos::ArrayRCP<size_t>& offsets) const
7055  {
7056  typedef LocalOrdinal LO;
7057  const char tfecfFuncName[] = "getLocalDiagOffsets: ";
7058  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7059  (! this->hasColMap (), std::runtime_error,
7060  "The graph does not yet have a column Map.");
7061  const LO myNumRows = static_cast<LO> (this->getNodeNumRows ());
7062  if (static_cast<LO> (offsets.size ()) != myNumRows) {
7063  // NOTE (mfh 21 Jan 2016) This means that the method does not
7064  // satisfy the strong exception guarantee (no side effects
7065  // unless successful).
7066  offsets.resize (myNumRows);
7067  }
7068 
7069  // mfh 21 Jan 2016: This method unfortunately takes a
7070  // Teuchos::ArrayRCP, which is host memory. The graph wants a
7071  // device pointer. We can't access host memory from the device;
7072  // that's the wrong direction for UVM. (It's the right direction
7073  // for inefficient host pinned memory, but we don't want to use
7074  // that here.) Thus, if device memory space != host memory space,
7075  // we allocate and use a temporary device View to get the offsets.
7076  // If the two spaces are equal, the template magic makes the deep
7077  // copy go away.
7078  typedef HelpGetLocalDiagOffsets<device_type> helper_type;
7079  typedef typename helper_type::host_offsets_type host_offsets_type;
7080  // Unmanaged host View that views the output array.
7081  host_offsets_type hostOffsets (offsets.getRawPtr (), myNumRows);
7082  // Allocate temp device View if host != device, else reuse host array.
7083  auto deviceOffsets = helper_type::getDeviceOffsets (hostOffsets);
7084  // NOT recursion; this calls the overload that takes a device View.
7085  this->getLocalDiagOffsets (deviceOffsets);
7086  helper_type::copyBackIfNeeded (hostOffsets, deviceOffsets);
7087  }
7088 
7089  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7090  bool
7093  return true;
7094  }
7095 
7096  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7097  void
7100  const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& rowTransfer,
7101  const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node> > & domainTransfer,
7102  const Teuchos::RCP<const map_type>& domainMap,
7103  const Teuchos::RCP<const map_type>& rangeMap,
7104  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7105  {
7110  using Teuchos::ArrayRCP;
7111  using Teuchos::ArrayView;
7112  using Teuchos::Comm;
7113  using Teuchos::ParameterList;
7114  using Teuchos::rcp;
7115  using Teuchos::RCP;
7116 #ifdef HAVE_TPETRA_MMM_TIMINGS
7117  using std::string;
7118  using Teuchos::TimeMonitor;
7119 #endif
7120 
7121  using LO = LocalOrdinal;
7122  using GO = GlobalOrdinal;
7123  using NT = node_type;
7124  using this_type = CrsGraph<LO, GO, NT>;
7125  using ivector_type = Vector<int, LO, GO, NT>;
7126  using packet_type = typename this_type::packet_type;
7127 
7128  const char* prefix = "Tpetra::CrsGraph::transferAndFillComplete: ";
7129 
7130 #ifdef HAVE_TPETRA_MMM_TIMINGS
7131  string label;
7132  if(!params.is_null()) label = params->get("Timer Label", label);
7133  string prefix2 = string("Tpetra ")+ label + std::string(": CrsGraph TAFC ");
7134  RCP<TimeMonitor> MM =
7135  rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Pack-1"))));
7136 #endif
7137 
7138  // Make sure that the input argument rowTransfer is either an
7139  // Import or an Export. Import and Export are the only two
7140  // subclasses of Transfer that we defined, but users might
7141  // (unwisely, for now at least) decide to implement their own
7142  // subclasses. Exclude this possibility.
7143  const import_type* xferAsImport = dynamic_cast<const import_type*>(&rowTransfer);
7144  const export_type* xferAsExport = dynamic_cast<const export_type*>(&rowTransfer);
7145  TEUCHOS_TEST_FOR_EXCEPTION(
7146  xferAsImport == nullptr && xferAsExport == nullptr, std::invalid_argument,
7147  prefix << "The 'rowTransfer' input argument must be either an Import or "
7148  "an Export, and its template parameters must match the corresponding "
7149  "template parameters of the CrsGraph.");
7150 
7151  // Make sure that the input argument domainTransfer is either an
7152  // Import or an Export. Import and Export are the only two
7153  // subclasses of Transfer that we defined, but users might
7154  // (unwisely, for now at least) decide to implement their own
7155  // subclasses. Exclude this possibility.
7156  Teuchos::RCP<const import_type> xferDomainAsImport =
7157  Teuchos::rcp_dynamic_cast<const import_type>(domainTransfer);
7158  Teuchos::RCP<const export_type> xferDomainAsExport =
7159  Teuchos::rcp_dynamic_cast<const export_type>(domainTransfer);
7160 
7161  if(! domainTransfer.is_null()) {
7162 
7163  TEUCHOS_TEST_FOR_EXCEPTION(
7164  (xferDomainAsImport.is_null() && xferDomainAsExport.is_null()), std::invalid_argument,
7165  prefix << "The 'domainTransfer' input argument must be either an "
7166  "Import or an Export, and its template parameters must match the "
7167  "corresponding template parameters of the CrsGraph.");
7168 
7169  TEUCHOS_TEST_FOR_EXCEPTION(
7170  ( xferAsImport != nullptr || ! xferDomainAsImport.is_null() ) &&
7171  (( xferAsImport != nullptr && xferDomainAsImport.is_null() ) ||
7172  ( xferAsImport == nullptr && ! xferDomainAsImport.is_null() )), std::invalid_argument,
7173  prefix << "The 'rowTransfer' and 'domainTransfer' input arguments "
7174  "must be of the same type (either Import or Export).");
7175 
7176  TEUCHOS_TEST_FOR_EXCEPTION(
7177  ( xferAsExport != nullptr || ! xferDomainAsExport.is_null() ) &&
7178  (( xferAsExport != nullptr && xferDomainAsExport.is_null() ) ||
7179  ( xferAsExport == nullptr && ! xferDomainAsExport.is_null() )), std::invalid_argument,
7180  prefix << "The 'rowTransfer' and 'domainTransfer' input arguments "
7181  "must be of the same type (either Import or Export).");
7182 
7183  } // domainTransfer != null
7184 
7185 
7186  // FIXME (mfh 15 May 2014) Wouldn't communication still be needed,
7187  // if the source Map is not distributed but the target Map is?
7188  const bool communication_needed = rowTransfer.getSourceMap()->isDistributed();
7189 
7190  //
7191  // Get the caller's parameters
7192  //
7193 
7194  bool reverseMode = false; // Are we in reverse mode?
7195  bool restrictComm = false; // Do we need to restrict the communicator?
7196  RCP<ParameterList> graphparams; // parameters for the destination graph
7197  if (! params.is_null()) {
7198  reverseMode = params->get("Reverse Mode", reverseMode);
7199  restrictComm = params->get("Restrict Communicator", restrictComm);
7200  graphparams = sublist(params, "CrsGraph");
7201  }
7202 
7203  // Get the new domain and range Maps. We need some of them for error
7204  // checking, now that we have the reverseMode parameter.
7205  RCP<const map_type> MyRowMap = reverseMode ?
7206  rowTransfer.getSourceMap() : rowTransfer.getTargetMap();
7207  RCP<const map_type> MyColMap; // create this below
7208  RCP<const map_type> MyDomainMap = ! domainMap.is_null() ? domainMap : getDomainMap();
7209  RCP<const map_type> MyRangeMap = ! rangeMap.is_null() ? rangeMap : getRangeMap();
7210  RCP<const map_type> BaseRowMap = MyRowMap;
7211  RCP<const map_type> BaseDomainMap = MyDomainMap;
7212 
7213  // If the user gave us a nonnull destGraph, then check whether it's
7214  // "pristine." That means that it has no entries.
7215  //
7216  // FIXME (mfh 15 May 2014) If this is not true on all processes,
7217  // then this exception test may hang. It would be better to
7218  // forward an error flag to the next communication phase.
7219  if (! destGraph.is_null()) {
7220  // FIXME (mfh 15 May 2014): The Epetra idiom for checking
7221  // whether a graph or matrix has no entries on the calling
7222  // process, is that it is neither locally nor globally indexed.
7223  // This may change eventually with the Kokkos refactor version
7224  // of Tpetra, so it would be better just to check the quantity
7225  // of interest directly. Note that with the Kokkos refactor
7226  // version of Tpetra, asking for the total number of entries in
7227  // a graph or matrix that is not fill complete might require
7228  // computation (kernel launch), since it is not thread scalable
7229  // to update a count every time an entry is inserted.
7230  const bool NewFlag =
7231  ! destGraph->isLocallyIndexed() && ! destGraph->isGloballyIndexed();
7232  TEUCHOS_TEST_FOR_EXCEPTION(! NewFlag, std::invalid_argument,
7233  prefix << "The input argument 'destGraph' is only allowed to be nonnull, "
7234  "if its graph is empty (neither locally nor globally indexed).");
7235 
7236  // FIXME (mfh 15 May 2014) At some point, we want to change
7237  // graphs and matrices so that their DistObject Map
7238  // (this->getMap()) may differ from their row Map. This will
7239  // make redistribution for 2-D distributions more efficient. I
7240  // hesitate to change this check, because I'm not sure how much
7241  // the code here depends on getMap() and getRowMap() being the
7242  // same.
7243  TEUCHOS_TEST_FOR_EXCEPTION(
7244  ! destGraph->getRowMap()->isSameAs(*MyRowMap), std::invalid_argument,
7245  prefix << "The (row) Map of the input argument 'destGraph' is not the "
7246  "same as the (row) Map specified by the input argument 'rowTransfer'.");
7247 
7248  TEUCHOS_TEST_FOR_EXCEPTION(
7249  ! destGraph->checkSizes(*this), std::invalid_argument,
7250  prefix << "You provided a nonnull destination graph, but checkSizes() "
7251  "indicates that it is not a legal legal target for redistribution from "
7252  "the source graph (*this). This may mean that they do not have the "
7253  "same dimensions.");
7254  }
7255 
7256  // If forward mode (the default), then *this's (row) Map must be
7257  // the same as the source Map of the Transfer. If reverse mode,
7258  // then *this's (row) Map must be the same as the target Map of
7259  // the Transfer.
7260  //
7261  // FIXME (mfh 15 May 2014) At some point, we want to change graphs
7262  // and matrices so that their DistObject Map (this->getMap()) may
7263  // differ from their row Map. This will make redistribution for
7264  // 2-D distributions more efficient. I hesitate to change this
7265  // check, because I'm not sure how much the code here depends on
7266  // getMap() and getRowMap() being the same.
7267  TEUCHOS_TEST_FOR_EXCEPTION(
7268  ! (reverseMode || getRowMap()->isSameAs(*rowTransfer.getSourceMap())),
7269  std::invalid_argument, prefix <<
7270  "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
7271 
7272  TEUCHOS_TEST_FOR_EXCEPTION(
7273  ! (! reverseMode || getRowMap()->isSameAs(*rowTransfer.getTargetMap())),
7274  std::invalid_argument, prefix <<
7275  "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
7276 
7277  // checks for domainTransfer
7278  TEUCHOS_TEST_FOR_EXCEPTION(
7279  ! xferDomainAsImport.is_null() && ! xferDomainAsImport->getTargetMap()->isSameAs(*domainMap),
7280  std::invalid_argument,
7281  prefix << "The target map of the 'domainTransfer' input argument must be "
7282  "the same as the rebalanced domain map 'domainMap'");
7283 
7284  TEUCHOS_TEST_FOR_EXCEPTION(
7285  ! xferDomainAsExport.is_null() && ! xferDomainAsExport->getSourceMap()->isSameAs(*domainMap),
7286  std::invalid_argument,
7287  prefix << "The source map of the 'domainTransfer' input argument must be "
7288  "the same as the rebalanced domain map 'domainMap'");
7289 
7290  // The basic algorithm here is:
7291  //
7292  // 1. Call the moral equivalent of "distor.do" to handle the import.
7293  // 2. Copy all the Imported and Copy/Permuted data into the raw
7294  // CrsGraph pointers, still using GIDs.
7295  // 3. Call an optimized version of MakeColMap that avoids the
7296  // Directory lookups (since the importer knows who owns all the
7297  // GIDs) AND reindexes to LIDs.
7298  // 4. Call expertStaticFillComplete()
7299 
7300  // Get information from the Importer
7301  const size_t NumSameIDs = rowTransfer.getNumSameIDs();
7302  ArrayView<const LO> ExportLIDs = reverseMode ?
7303  rowTransfer.getRemoteLIDs() : rowTransfer.getExportLIDs();
7304  ArrayView<const LO> RemoteLIDs = reverseMode ?
7305  rowTransfer.getExportLIDs() : rowTransfer.getRemoteLIDs();
7306  ArrayView<const LO> PermuteToLIDs = reverseMode ?
7307  rowTransfer.getPermuteFromLIDs() : rowTransfer.getPermuteToLIDs();
7308  ArrayView<const LO> PermuteFromLIDs = reverseMode ?
7309  rowTransfer.getPermuteToLIDs() : rowTransfer.getPermuteFromLIDs();
7310  Distributor& Distor = rowTransfer.getDistributor();
7311 
7312  // Owning PIDs
7313  Teuchos::Array<int> SourcePids;
7314  Teuchos::Array<int> TargetPids;
7315  int MyPID = getComm()->getRank();
7316 
7317  // Temp variables for sub-communicators
7318  RCP<const map_type> ReducedRowMap, ReducedColMap,
7319  ReducedDomainMap, ReducedRangeMap;
7320  RCP<const Comm<int> > ReducedComm;
7321 
7322  // If the user gave us a null destGraph, then construct the new
7323  // destination graph. We will replace its column Map later.
7324  if (destGraph.is_null()) {
7325  destGraph = rcp(new this_type(MyRowMap, 0, StaticProfile, graphparams));
7326  }
7327 
7328  /***************************************************/
7329  /***** 1) First communicator restriction phase ****/
7330  /***************************************************/
7331  if (restrictComm) {
7332  ReducedRowMap = MyRowMap->removeEmptyProcesses();
7333  ReducedComm = ReducedRowMap.is_null() ?
7334  Teuchos::null :
7335  ReducedRowMap->getComm();
7336  destGraph->removeEmptyProcessesInPlace(ReducedRowMap);
7337 
7338  ReducedDomainMap = MyRowMap.getRawPtr() == MyDomainMap.getRawPtr() ?
7339  ReducedRowMap :
7340  MyDomainMap->replaceCommWithSubset(ReducedComm);
7341  ReducedRangeMap = MyRowMap.getRawPtr() == MyRangeMap.getRawPtr() ?
7342  ReducedRowMap :
7343  MyRangeMap->replaceCommWithSubset(ReducedComm);
7344 
7345  // Reset the "my" maps
7346  MyRowMap = ReducedRowMap;
7347  MyDomainMap = ReducedDomainMap;
7348  MyRangeMap = ReducedRangeMap;
7349 
7350  // Update my PID, if we've restricted the communicator
7351  if (! ReducedComm.is_null()) {
7352  MyPID = ReducedComm->getRank();
7353  }
7354  else {
7355  MyPID = -2; // For debugging
7356  }
7357  }
7358  else {
7359  ReducedComm = MyRowMap->getComm();
7360  }
7361 
7362  /***************************************************/
7363  /***** 2) From Tpera::DistObject::doTransfer() ****/
7364  /***************************************************/
7365 #ifdef HAVE_TPETRA_MMM_TIMINGS
7366  MM = Teuchos::null;
7367  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("ImportSetup"))));
7368 #endif
7369  // Get the owning PIDs
7370  RCP<const import_type> MyImporter = getImporter();
7371 
7372  // check whether domain maps of source graph and base domain map is the same
7373  bool bSameDomainMap = BaseDomainMap->isSameAs(*getDomainMap());
7374 
7375  if (! restrictComm && ! MyImporter.is_null() && bSameDomainMap ) {
7376  // Same domain map as source graph
7377  //
7378  // NOTE: This won't work for restrictComm (because the Import
7379  // doesn't know the restricted PIDs), though writing an
7380  // optimized version for that case would be easy (Import an
7381  // IntVector of the new PIDs). Might want to add this later.
7382  Import_Util::getPids(*MyImporter, SourcePids, false);
7383  }
7384  else if (restrictComm && ! MyImporter.is_null() && bSameDomainMap) {
7385  // Same domain map as source graph (restricted communicator)
7386  // We need one import from the domain to the column map
7387  ivector_type SourceDomain_pids(getDomainMap(),true);
7388  ivector_type SourceCol_pids(getColMap());
7389  // SourceDomain_pids contains the restricted pids
7390  SourceDomain_pids.putScalar(MyPID);
7391 
7392  SourceCol_pids.doImport(SourceDomain_pids, *MyImporter, INSERT);
7393  SourcePids.resize(getColMap()->getNodeNumElements());
7394  SourceCol_pids.get1dCopy(SourcePids());
7395  }
7396  else if (MyImporter.is_null() && bSameDomainMap) {
7397  // Graph has no off-process entries
7398  SourcePids.resize(getColMap()->getNodeNumElements());
7399  SourcePids.assign(getColMap()->getNodeNumElements(), MyPID);
7400  }
7401  else if ( ! MyImporter.is_null() &&
7402  ! domainTransfer.is_null() ) {
7403  // general implementation for rectangular matrices with
7404  // domain map different than SourceGraph domain map.
7405  // User has to provide a DomainTransfer object. We need
7406  // to communications (import/export)
7407 
7408  // TargetDomain_pids lives on the rebalanced new domain map
7409  ivector_type TargetDomain_pids(domainMap);
7410  TargetDomain_pids.putScalar(MyPID);
7411 
7412  // SourceDomain_pids lives on the non-rebalanced old domain map
7413  ivector_type SourceDomain_pids(getDomainMap());
7414 
7415  // SourceCol_pids lives on the non-rebalanced old column map
7416  ivector_type SourceCol_pids(getColMap());
7417 
7418  if (! reverseMode && ! xferDomainAsImport.is_null() ) {
7419  SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsImport, INSERT);
7420  }
7421  else if (reverseMode && ! xferDomainAsExport.is_null() ) {
7422  SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsExport, INSERT);
7423  }
7424  else if (! reverseMode && ! xferDomainAsExport.is_null() ) {
7425  SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsExport, INSERT);
7426  }
7427  else if (reverseMode && ! xferDomainAsImport.is_null() ) {
7428  SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsImport, INSERT);
7429  }
7430  else {
7431  TEUCHOS_TEST_FOR_EXCEPTION(
7432  true, std::logic_error,
7433  prefix << "Should never get here! Please report this bug to a Tpetra developer.");
7434  }
7435  SourceCol_pids.doImport(SourceDomain_pids, *MyImporter, INSERT);
7436  SourcePids.resize(getColMap()->getNodeNumElements());
7437  SourceCol_pids.get1dCopy(SourcePids());
7438  }
7439  else if (BaseDomainMap->isSameAs(*BaseRowMap) &&
7440  getDomainMap()->isSameAs(*getRowMap())) {
7441  // We can use the rowTransfer + SourceGraph's Import to find out who owns what.
7442  ivector_type TargetRow_pids(domainMap);
7443  ivector_type SourceRow_pids(getRowMap());
7444  ivector_type SourceCol_pids(getColMap());
7445 
7446  TargetRow_pids.putScalar(MyPID);
7447  if (! reverseMode && xferAsImport != nullptr) {
7448  SourceRow_pids.doExport(TargetRow_pids, *xferAsImport, INSERT);
7449  }
7450  else if (reverseMode && xferAsExport != nullptr) {
7451  SourceRow_pids.doExport(TargetRow_pids, *xferAsExport, INSERT);
7452  }
7453  else if (! reverseMode && xferAsExport != nullptr) {
7454  SourceRow_pids.doImport(TargetRow_pids, *xferAsExport, INSERT);
7455  }
7456  else if (reverseMode && xferAsImport != nullptr) {
7457  SourceRow_pids.doImport(TargetRow_pids, *xferAsImport, INSERT);
7458  }
7459  else {
7460  TEUCHOS_TEST_FOR_EXCEPTION(
7461  true, std::logic_error,
7462  prefix << "Should never get here! Please report this bug to a Tpetra developer.");
7463  }
7464  SourceCol_pids.doImport(SourceRow_pids, *MyImporter, INSERT);
7465  SourcePids.resize(getColMap()->getNodeNumElements());
7466  SourceCol_pids.get1dCopy(SourcePids());
7467  }
7468  else {
7469  TEUCHOS_TEST_FOR_EXCEPTION(
7470  true, std::invalid_argument,
7471  prefix << "This method only allows either domainMap == getDomainMap(), "
7472  "or (domainMap == rowTransfer.getTargetMap() and getDomainMap() == getRowMap()).");
7473  }
7474 
7475  // Tpetra-specific stuff
7476  size_t constantNumPackets = destGraph->constantNumberOfPackets();
7477  if (constantNumPackets == 0) {
7478  destGraph->reallocArraysForNumPacketsPerLid(ExportLIDs.size(),
7479  RemoteLIDs.size());
7480  }
7481  else {
7482  // There are a constant number of packets per element. We
7483  // already know (from the number of "remote" (incoming)
7484  // elements) how many incoming elements we expect, so we can
7485  // resize the buffer accordingly.
7486  const size_t rbufLen = RemoteLIDs.size() * constantNumPackets;
7487  destGraph->reallocImportsIfNeeded(rbufLen, false, nullptr);
7488  }
7489 
7490  {
7491  // packAndPrepare* methods modify numExportPacketsPerLID_.
7492  destGraph->numExportPacketsPerLID_.modify_host();
7493  Teuchos::ArrayView<size_t> numExportPacketsPerLID =
7494  getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
7495 
7496  // Pack & Prepare w/ owning PIDs
7497  packCrsGraphWithOwningPIDs(*this, destGraph->exports_,
7498  numExportPacketsPerLID, ExportLIDs,
7499  SourcePids, constantNumPackets, Distor);
7500  }
7501 
7502  // Do the exchange of remote data.
7503 #ifdef HAVE_TPETRA_MMM_TIMINGS
7504  MM = Teuchos::null;
7505  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Transfer"))));
7506 #endif
7507 
7508  if (communication_needed) {
7509  if (reverseMode) {
7510  if (constantNumPackets == 0) { // variable number of packets per LID
7511  // Make sure that host has the latest version, since we're
7512  // using the version on host. If host has the latest
7513  // version, syncing to host does nothing.
7514  destGraph->numExportPacketsPerLID_.sync_host();
7515  Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
7516  getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
7517  destGraph->numImportPacketsPerLID_.sync_host();
7518  Teuchos::ArrayView<size_t> numImportPacketsPerLID =
7519  getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
7520  Distor.doReversePostsAndWaits(numExportPacketsPerLID, 1,
7521  numImportPacketsPerLID);
7522  size_t totalImportPackets = 0;
7523  for (Array_size_type i = 0; i < numImportPacketsPerLID.size(); ++i) {
7524  totalImportPackets += numImportPacketsPerLID[i];
7525  }
7526 
7527  // Reallocation MUST go before setting the modified flag,
7528  // because it may clear out the flags.
7529  destGraph->reallocImportsIfNeeded(totalImportPackets, false, nullptr);
7530  destGraph->imports_.modify_host();
7531  Teuchos::ArrayView<packet_type> hostImports =
7532  getArrayViewFromDualView(destGraph->imports_);
7533  // This is a legacy host pack/unpack path, so use the host
7534  // version of exports_.
7535  destGraph->exports_.sync_host();
7536  Teuchos::ArrayView<const packet_type> hostExports =
7537  getArrayViewFromDualView(destGraph->exports_);
7538  Distor.doReversePostsAndWaits(hostExports,
7539  numExportPacketsPerLID,
7540  hostImports,
7541  numImportPacketsPerLID);
7542  }
7543  else { // constant number of packets per LI
7544  destGraph->imports_.modify_host();
7545  Teuchos::ArrayView<packet_type> hostImports =
7546  getArrayViewFromDualView(destGraph->imports_);
7547  // This is a legacy host pack/unpack path, so use the host
7548  // version of exports_.
7549  destGraph->exports_.sync_host();
7550  Teuchos::ArrayView<const packet_type> hostExports =
7551  getArrayViewFromDualView(destGraph->exports_);
7552  Distor.doReversePostsAndWaits(hostExports,
7553  constantNumPackets,
7554  hostImports);
7555  }
7556  }
7557  else { // forward mode (the default)
7558  if (constantNumPackets == 0) { // variable number of packets per LID
7559  // Make sure that host has the latest version, since we're
7560  // using the version on host. If host has the latest
7561  // version, syncing to host does nothing.
7562  destGraph->numExportPacketsPerLID_.sync_host();
7563  Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
7564  getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
7565  destGraph->numImportPacketsPerLID_.sync_host();
7566  Teuchos::ArrayView<size_t> numImportPacketsPerLID =
7567  getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
7568  Distor.doPostsAndWaits(numExportPacketsPerLID, 1,
7569  numImportPacketsPerLID);
7570  size_t totalImportPackets = 0;
7571  for (Array_size_type i = 0; i < numImportPacketsPerLID.size(); ++i) {
7572  totalImportPackets += numImportPacketsPerLID[i];
7573  }
7574 
7575  // Reallocation MUST go before setting the modified flag,
7576  // because it may clear out the flags.
7577  destGraph->reallocImportsIfNeeded(totalImportPackets, false, nullptr);
7578  destGraph->imports_.modify_host();
7579  Teuchos::ArrayView<packet_type> hostImports =
7580  getArrayViewFromDualView(destGraph->imports_);
7581  // This is a legacy host pack/unpack path, so use the host
7582  // version of exports_.
7583  destGraph->exports_.sync_host();
7584  Teuchos::ArrayView<const packet_type> hostExports =
7585  getArrayViewFromDualView(destGraph->exports_);
7586  Distor.doPostsAndWaits(hostExports,
7587  numExportPacketsPerLID,
7588  hostImports,
7589  numImportPacketsPerLID);
7590  }
7591  else { // constant number of packets per LID
7592  destGraph->imports_.modify_host();
7593  Teuchos::ArrayView<packet_type> hostImports =
7594  getArrayViewFromDualView(destGraph->imports_);
7595  // This is a legacy host pack/unpack path, so use the host
7596  // version of exports_.
7597  destGraph->exports_.sync_host();
7598  Teuchos::ArrayView<const packet_type> hostExports =
7599  getArrayViewFromDualView(destGraph->exports_);
7600  Distor.doPostsAndWaits(hostExports,
7601  constantNumPackets,
7602  hostImports);
7603  }
7604  }
7605  }
7606 
7607  /*********************************************************************/
7608  /**** 3) Copy all of the Same/Permute/Remote data into CSR_arrays ****/
7609  /*********************************************************************/
7610 
7611 #ifdef HAVE_TPETRA_MMM_TIMINGS
7612  MM = Teuchos::null;
7613  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Unpack-1"))));
7614 #endif
7615 
7616  // Backwards compatibility measure. We'll use this again below.
7617  destGraph->numImportPacketsPerLID_.sync_host();
7618  Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
7619  getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
7620  destGraph->imports_.sync_host();
7621  Teuchos::ArrayView<const packet_type> hostImports =
7622  getArrayViewFromDualView(destGraph->imports_);
7623  size_t mynnz =
7624  unpackAndCombineWithOwningPIDsCount(*this, RemoteLIDs, hostImports,
7625  numImportPacketsPerLID,
7626  constantNumPackets, Distor, INSERT,
7627  NumSameIDs, PermuteToLIDs, PermuteFromLIDs);
7628  size_t N = BaseRowMap->getNodeNumElements();
7629 
7630  // Allocations
7631  ArrayRCP<size_t> CSR_rowptr(N+1);
7632  ArrayRCP<GO> CSR_colind_GID;
7633  ArrayRCP<LO> CSR_colind_LID;
7634  CSR_colind_GID.resize(mynnz);
7635 
7636  // If LO and GO are the same, we can reuse memory when
7637  // converting the column indices from global to local indices.
7638  if (typeid(LO) == typeid(GO)) {
7639  CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO>(CSR_colind_GID);
7640  }
7641  else {
7642  CSR_colind_LID.resize(mynnz);
7643  }
7644 
7645  // FIXME (mfh 15 May 2014) Why can't we abstract this out as an
7646  // unpackAndCombine method on a "CrsArrays" object? This passing
7647  // in a huge list of arrays is icky. Can't we have a bit of an
7648  // abstraction? Implementing a concrete DistObject subclass only
7649  // takes five methods.
7650  unpackAndCombineIntoCrsArrays(*this, RemoteLIDs, hostImports,
7651  numImportPacketsPerLID, constantNumPackets,
7652  Distor, INSERT, NumSameIDs, PermuteToLIDs,
7653  PermuteFromLIDs, N, mynnz, MyPID,
7654  CSR_rowptr(), CSR_colind_GID(),
7655  SourcePids(), TargetPids);
7656 
7657  /**************************************************************/
7658  /**** 4) Call Optimized MakeColMap w/ no Directory Lookups ****/
7659  /**************************************************************/
7660 #ifdef HAVE_TPETRA_MMM_TIMINGS
7661  MM = Teuchos::null;
7662  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Unpack-2"))));
7663 #endif
7664  // Call an optimized version of makeColMap that avoids the
7665  // Directory lookups (since the Import object knows who owns all
7666  // the GIDs).
7667  Teuchos::Array<int> RemotePids;
7668  Import_Util::lowCommunicationMakeColMapAndReindex(CSR_rowptr(),
7669  CSR_colind_LID(),
7670  CSR_colind_GID(),
7671  BaseDomainMap,
7672  TargetPids, RemotePids,
7673  MyColMap);
7674 
7675  /*******************************************************/
7676  /**** 4) Second communicator restriction phase ****/
7677  /*******************************************************/
7678  if (restrictComm) {
7679  ReducedColMap = (MyRowMap.getRawPtr() == MyColMap.getRawPtr()) ?
7680  ReducedRowMap :
7681  MyColMap->replaceCommWithSubset(ReducedComm);
7682  MyColMap = ReducedColMap; // Reset the "my" maps
7683  }
7684 
7685  // Replace the col map
7686  destGraph->replaceColMap(MyColMap);
7687 
7688  // Short circuit if the processor is no longer in the communicator
7689  //
7690  // NOTE: Epetra replaces modifies all "removed" processes so they
7691  // have a dummy (serial) Map that doesn't touch the original
7692  // communicator. Duplicating that here might be a good idea.
7693  if (ReducedComm.is_null()) {
7694  return;
7695  }
7696 
7697  /***************************************************/
7698  /**** 5) Sort ****/
7699  /***************************************************/
7700  if ((! reverseMode && xferAsImport != nullptr) ||
7701  (reverseMode && xferAsExport != nullptr)) {
7702  Import_Util::sortCrsEntries(CSR_rowptr(),
7703  CSR_colind_LID());
7704  }
7705  else if ((! reverseMode && xferAsExport != nullptr) ||
7706  (reverseMode && xferAsImport != nullptr)) {
7707  Import_Util::sortAndMergeCrsEntries(CSR_rowptr(),
7708  CSR_colind_LID());
7709  if (CSR_rowptr[N] != mynnz) {
7710  CSR_colind_LID.resize(CSR_rowptr[N]);
7711  }
7712  }
7713  else {
7714  TEUCHOS_TEST_FOR_EXCEPTION(
7715  true, std::logic_error,
7716  prefix << "Should never get here! Please report this bug to a Tpetra developer.");
7717  }
7718  /***************************************************/
7719  /**** 6) Reset the colmap and the arrays ****/
7720  /***************************************************/
7721 
7722  // Call constructor for the new graph (restricted as needed)
7723  //
7724  destGraph->setAllIndices(CSR_rowptr, CSR_colind_LID);
7725 
7726  /***************************************************/
7727  /**** 7) Build Importer & Call ESFC ****/
7728  /***************************************************/
7729  // Pre-build the importer using the existing PIDs
7730  Teuchos::ParameterList esfc_params;
7731 #ifdef HAVE_TPETRA_MMM_TIMINGS
7732  MM = Teuchos::null;
7733  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("CreateImporter"))));
7734 #endif
7735  RCP<import_type> MyImport = rcp(new import_type(MyDomainMap, MyColMap, RemotePids));
7736 #ifdef HAVE_TPETRA_MMM_TIMINGS
7737  MM = Teuchos::null;
7738  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("ESFC"))));
7739 
7740  esfc_params.set("Timer Label",prefix + std::string("TAFC"));
7741 #endif
7742  if(!params.is_null())
7743  esfc_params.set("compute global constants",params->get("compute global constants",true));
7744 
7745  destGraph->expertStaticFillComplete(MyDomainMap, MyRangeMap,
7746  MyImport, Teuchos::null, rcp(&esfc_params,false));
7747 
7748  }
7749 
7750  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7751  void
7754  const import_type& importer,
7755  const Teuchos::RCP<const map_type>& domainMap,
7756  const Teuchos::RCP<const map_type>& rangeMap,
7757  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7758  {
7759  transferAndFillComplete(destGraph, importer, Teuchos::null, domainMap, rangeMap, params);
7760  }
7761 
7762  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7763  void
7766  const import_type& rowImporter,
7767  const import_type& domainImporter,
7768  const Teuchos::RCP<const map_type>& domainMap,
7769  const Teuchos::RCP<const map_type>& rangeMap,
7770  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7771  {
7772  transferAndFillComplete(destGraph, rowImporter, Teuchos::rcpFromRef(domainImporter), domainMap, rangeMap, params);
7773  }
7774 
7775  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7776  void
7779  const export_type& exporter,
7780  const Teuchos::RCP<const map_type>& domainMap,
7781  const Teuchos::RCP<const map_type>& rangeMap,
7782  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7783  {
7784  transferAndFillComplete(destGraph, exporter, Teuchos::null, domainMap, rangeMap, params);
7785  }
7786 
7787  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7788  void
7791  const export_type& rowExporter,
7792  const export_type& domainExporter,
7793  const Teuchos::RCP<const map_type>& domainMap,
7794  const Teuchos::RCP<const map_type>& rangeMap,
7795  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7796  {
7797  transferAndFillComplete(destGraph, rowExporter, Teuchos::rcpFromRef(domainExporter), domainMap, rangeMap, params);
7798  }
7799 
7800 
7801  template<class LocalOrdinal, class GlobalOrdinal, class Node>
7802  void
7805  {
7806  std::swap(graph.rowMap_, this->rowMap_);
7807  std::swap(graph.colMap_, this->colMap_);
7808  std::swap(graph.rangeMap_, this->rangeMap_);
7809  std::swap(graph.domainMap_, this->domainMap_);
7810 
7811  std::swap(graph.importer_, this->importer_);
7812  std::swap(graph.exporter_, this->exporter_);
7813 
7814  std::swap(graph.lclGraph_, this->lclGraph_);
7815 
7816  std::swap(graph.nodeNumDiags_, this->nodeNumDiags_);
7817  std::swap(graph.nodeMaxNumRowEntries_, this->nodeMaxNumRowEntries_);
7818 
7819  std::swap(graph.globalNumEntries_, this->globalNumEntries_);
7820  std::swap(graph.globalNumDiags_, this->globalNumDiags_);
7821  std::swap(graph.globalMaxNumRowEntries_, this->globalMaxNumRowEntries_);
7822 
7823  std::swap(graph.pftype_, this->pftype_);
7824 
7825  std::swap(graph.numAllocForAllRows_, this->numAllocForAllRows_);
7826 
7827  std::swap(graph.k_rowPtrs_, this->k_rowPtrs_);
7828 
7829  std::swap(graph.k_lclInds1D_, this->k_lclInds1D_);
7830  std::swap(graph.k_gblInds1D_, this->k_gblInds1D_);
7831 
7832  std::swap(graph.lclInds2D_, this->lclInds2D_);
7833  std::swap(graph.gblInds2D_, this->gblInds2D_);
7834 
7835  std::swap(graph.storageStatus_, this->storageStatus_);
7836 
7837  std::swap(graph.indicesAreAllocated_, this->indicesAreAllocated_);
7838  std::swap(graph.indicesAreLocal_, this->indicesAreLocal_);
7839  std::swap(graph.indicesAreGlobal_, this->indicesAreGlobal_);
7840  std::swap(graph.fillComplete_, this->fillComplete_);
7841  std::swap(graph.lowerTriangular_, this->lowerTriangular_);
7842  std::swap(graph.upperTriangular_, this->upperTriangular_);
7843  std::swap(graph.indicesAreSorted_, this->indicesAreSorted_);
7844  std::swap(graph.noRedundancies_, this->noRedundancies_);
7845  std::swap(graph.haveLocalConstants_, this->haveLocalConstants_);
7846  std::swap(graph.haveGlobalConstants_, this->haveGlobalConstants_);
7847 
7848  std::swap(graph.sortGhostsAssociatedWithEachProcessor_, this->sortGhostsAssociatedWithEachProcessor_);
7849 
7850  std::swap(graph.k_numAllocPerRow_, this->k_numAllocPerRow_); // View
7851  std::swap(graph.k_numRowEntries_, this->k_numRowEntries_); // View
7852  std::swap(graph.nonlocals_, this->nonlocals_); // std::map
7853  }
7854 
7855 
7856  template<class LocalOrdinal, class GlobalOrdinal, class Node>
7857  bool
7860  {
7861  auto compare_nonlocals = [&] (const nonlocals_type & m1, const nonlocals_type & m2) {
7862  bool output = true;
7863  output = m1.size() == m2.size() ? output : false;
7864  for(auto & it_m: m1)
7865  {
7866  size_t key = it_m.first;
7867  output = m2.find(key) != m2.end() ? output : false;
7868  if(output)
7869  {
7870  auto v1 = m1.find(key)->second;
7871  auto v2 = m2.find(key)->second;
7872  std::sort(v1.begin(), v1.end());
7873  std::sort(v2.begin(), v2.end());
7874 
7875  output = v1.size() == v2.size() ? output : false;
7876  for(size_t i=0; output && i<v1.size(); i++)
7877  {
7878  output = v1[i]==v2[i] ? output : false;
7879  }
7880  }
7881  }
7882  return output;
7883  };
7884 
7885  bool output = true;
7886 
7887  output = this->rowMap_->isSameAs( *(graph.rowMap_) ) ? output : false;
7888  output = this->colMap_->isSameAs( *(graph.colMap_) ) ? output : false;
7889  output = this->rangeMap_->isSameAs( *(graph.rangeMap_) ) ? output : false;
7890  output = this->domainMap_->isSameAs( *(graph.domainMap_) ) ? output : false;
7891 
7892  output = this->nodeNumDiags_ == graph.nodeNumDiags_ ? output : false;
7893  output = this->nodeMaxNumRowEntries_ == graph.nodeMaxNumRowEntries_ ? output : false;
7894 
7895  output = this->globalNumEntries_ == graph.globalNumEntries_ ? output : false;
7896  output = this->globalNumDiags_ == graph.globalNumDiags_ ? output : false;
7897  output = this->globalMaxNumRowEntries_ == graph.globalMaxNumRowEntries_ ? output : false;
7898 
7899  output = this->pftype_ == graph.pftype_ ? output : false; // ProfileType is a enum (scalar)
7900 
7901  output = this->numAllocForAllRows_ == graph.numAllocForAllRows_ ? output : false;
7902 
7903  output = this->lclInds2D_ == graph.lclInds2D_ ? output : false; // Teuchos::Array has == overloaded
7904  output = this->gblInds2D_ == graph.gblInds2D_ ? output : false; // Teuchos::Array has == overloaded
7905 
7906  output = this->storageStatus_ == graph.storageStatus_ ? output : false; // EStorageStatus is an enum
7907 
7908  output = this->indicesAreAllocated_ == graph.indicesAreAllocated_ ? output : false;
7909  output = this->indicesAreLocal_ == graph.indicesAreLocal_ ? output : false;
7910  output = this->indicesAreGlobal_ == graph.indicesAreGlobal_ ? output : false;
7911  output = this->fillComplete_ == graph.fillComplete_ ? output : false;
7912  output = this->lowerTriangular_ == graph.lowerTriangular_ ? output : false;
7913  output = this->upperTriangular_ == graph.upperTriangular_ ? output : false;
7914  output = this->indicesAreSorted_ == graph.indicesAreSorted_ ? output : false;
7915  output = this->noRedundancies_ == graph.noRedundancies_ ? output : false;
7916  output = this->haveLocalConstants_ == graph.haveLocalConstants_ ? output : false;
7917  output = this->haveGlobalConstants_ == graph.haveGlobalConstants_ ? output : false;
7918  output = this->sortGhostsAssociatedWithEachProcessor_ == this->sortGhostsAssociatedWithEachProcessor_ ? output : false;
7919 
7920  // Compare nonlocals_ -- std::map<GlobalOrdinal, std::vector<GlobalOrdinal> >
7921  // nonlocals_ isa std::map<GO, std::vector<GO> >
7922  output = compare_nonlocals(this->nonlocals_, graph.nonlocals_) ? output : false;
7923 
7924  // Compare k_numAllocPerRow_ isa Kokkos::View::HostMirror
7925  // - since this is a HostMirror type, it should be in host memory already
7926  output = this->k_numAllocPerRow_.extent(0) == graph.k_numAllocPerRow_.extent(0) ? output : false;
7927  if(output && this->k_numAllocPerRow_.extent(0) > 0)
7928  {
7929  for(size_t i=0; output && i<this->k_numAllocPerRow_.extent(0); i++)
7930  output = this->k_numAllocPerRow_(i) == graph.k_numAllocPerRow_(i) ? output : false;
7931  }
7932 
7933  // Compare k_numRowEntries_ isa Kokkos::View::HostMirror
7934  // - since this is a HostMirror type, it should be in host memory already
7935  output = this->k_numRowEntries_.extent(0) == graph.k_numRowEntries_.extent(0) ? output : false;
7936  if(output && this->k_numRowEntries_.extent(0) > 0)
7937  {
7938  for(size_t i = 0; output && i < this->k_numRowEntries_.extent(0); i++)
7939  output = this->k_numRowEntries_(i) == graph.k_numRowEntries_(i) ? output : false;
7940  }
7941 
7942  // Compare this->k_rowPtrs_ isa Kokkos::View<LocalOrdinal*, ...>
7943  output = this->k_rowPtrs_.extent(0) == graph.k_rowPtrs_.extent(0) ? output : false;
7944  if(output && this->k_rowPtrs_.extent(0) > 0)
7945  {
7946  typename local_graph_type::row_map_type::const_type::HostMirror k_rowPtrs_host_this = Kokkos::create_mirror_view(this->k_rowPtrs_);
7947  typename local_graph_type::row_map_type::const_type::HostMirror k_rowPtrs_host_graph= Kokkos::create_mirror_view(graph.k_rowPtrs_);
7948  Kokkos::deep_copy(k_rowPtrs_host_this, this->k_rowPtrs_);
7949  Kokkos::deep_copy(k_rowPtrs_host_graph, graph.k_rowPtrs_);
7950  for(size_t i=0; output && i<k_rowPtrs_host_this.extent(0); i++)
7951  output = k_rowPtrs_host_this(i) == k_rowPtrs_host_graph(i) ? output : false;
7952  }
7953 
7954  // Compare k_lclInds1D_ isa Kokkos::View<LocalOrdinal*, ...>
7955  output = this->k_lclInds1D_.extent(0) == graph.k_lclInds1D_.extent(0) ? output : false;
7956  if(output && this->k_lclInds1D_.extent(0) > 0)
7957  {
7958  typename local_graph_type::entries_type::non_const_type::HostMirror k_lclInds1D_host_this = Kokkos::create_mirror_view(this->k_lclInds1D_);
7959  typename local_graph_type::entries_type::non_const_type::HostMirror k_lclInds1D_host_graph= Kokkos::create_mirror_view(graph.k_lclInds1D_);
7960  Kokkos::deep_copy(k_lclInds1D_host_this, this->k_lclInds1D_);
7961  Kokkos::deep_copy(k_lclInds1D_host_graph, graph.k_lclInds1D_);
7962  for(size_t i=0; output && i < k_lclInds1D_host_this.extent(0); i++)
7963  output = k_lclInds1D_host_this(i) == k_lclInds1D_host_graph(i) ? output : false;
7964  }
7965 
7966  // Compare k_gblInds1D_ isa Kokkos::View<GlobalOrdinal*, ...>
7967  output = this->k_gblInds1D_.extent(0) == graph.k_gblInds1D_.extent(0) ? output : false;
7968  if(output && this->k_gblInds1D_.extent(0) > 0)
7969  {
7970  typename t_GlobalOrdinal_1D::HostMirror k_gblInds1D_host_this = Kokkos::create_mirror_view(this->k_gblInds1D_);
7971  typename t_GlobalOrdinal_1D::HostMirror k_gblInds1D_host_graph = Kokkos::create_mirror_view(graph.k_gblInds1D_);
7972  Kokkos::deep_copy(k_gblInds1D_host_this, this->k_gblInds1D_);
7973  Kokkos::deep_copy(k_gblInds1D_host_graph, graph.k_gblInds1D_);
7974  for(size_t i=0; output && i<k_gblInds1D_host_this.extent(0); i++)
7975  output = k_gblInds1D_host_this(i) == k_gblInds1D_host_graph(i) ? output : false;
7976  }
7977 
7978  // Check lclGraph_ // isa Kokkos::StaticCrsGraph<LocalOrdinal, Kokkos::LayoutLeft, execution_space>
7979  // Kokkos::StaticCrsGraph has 3 data members in it:
7980  // Kokkos::View<size_type*, ...> row_map (local_graph_type::row_map_type)
7981  // Kokkos::View<data_type*, ...> entries (local_graph_type::entries_type)
7982  // Kokkos::View<size_type*, ...> row_block_offsets (local_graph_type::row_block_type)
7983  // There is currently no Kokkos::StaticCrsGraph comparison function that's built-in, so we will just compare
7984  // the three data items here. This can be replaced if Kokkos ever puts in its own comparison routine.
7985  output = this->lclGraph_.row_map.extent(0) == graph.lclGraph_.row_map.extent(0) ? output : false;
7986  if(output && this->lclGraph_.row_map.extent(0) > 0)
7987  {
7988  typename local_graph_type::row_map_type::HostMirror lclGraph_rowmap_host_this = Kokkos::create_mirror_view(this->lclGraph_.row_map);
7989  typename local_graph_type::row_map_type::HostMirror lclGraph_rowmap_host_graph = Kokkos::create_mirror_view(graph.lclGraph_.row_map);
7990  Kokkos::deep_copy(lclGraph_rowmap_host_this, this->lclGraph_.row_map);
7991  Kokkos::deep_copy(lclGraph_rowmap_host_graph, graph.lclGraph_.row_map);
7992  for(size_t i=0; output && i<lclGraph_rowmap_host_this.extent(0); i++)
7993  output = lclGraph_rowmap_host_this(i) == lclGraph_rowmap_host_graph(i) ? output : false;
7994  }
7995 
7996  output = this->lclGraph_.entries.extent(0) == graph.lclGraph_.entries.extent(0) ? output : false;
7997  if(output && this->lclGraph_.entries.extent(0) > 0)
7998  {
7999  typename local_graph_type::entries_type::HostMirror lclGraph_entries_host_this = Kokkos::create_mirror_view(this->lclGraph_.entries);
8000  typename local_graph_type::entries_type::HostMirror lclGraph_entries_host_graph = Kokkos::create_mirror_view(graph.lclGraph_.entries);
8001  Kokkos::deep_copy(lclGraph_entries_host_this, this->lclGraph_.entries);
8002  Kokkos::deep_copy(lclGraph_entries_host_graph, graph.lclGraph_.entries);
8003  for(size_t i=0; output && i<lclGraph_entries_host_this.extent(0); i++)
8004  output = lclGraph_entries_host_this(i) == lclGraph_entries_host_graph(i) ? output : false;
8005  }
8006 
8007  output = this->lclGraph_.row_block_offsets.extent(0) == graph.lclGraph_.row_block_offsets.extent(0) ? output : false;
8008  if(output && this->lclGraph_.row_block_offsets.extent(0) > 0)
8009  {
8010  typename local_graph_type::row_block_type::HostMirror lclGraph_rbo_host_this = Kokkos::create_mirror_view(this->lclGraph_.row_block_offsets);
8011  typename local_graph_type::row_block_type::HostMirror lclGraph_rbo_host_graph = Kokkos::create_mirror_view(graph.lclGraph_.row_block_offsets);
8012  Kokkos::deep_copy(lclGraph_rbo_host_this, this->lclGraph_.row_block_offsets);
8013  Kokkos::deep_copy(lclGraph_rbo_host_graph, graph.lclGraph_.row_block_offsets);
8014  for(size_t i=0; output && i < lclGraph_rbo_host_this.extent(0); i++)
8015  output = lclGraph_rbo_host_this(i) == lclGraph_rbo_host_graph(i) ? output : false;
8016  }
8017 
8018  // For the Importer and Exporter, we shouldn't need to explicitly check them since
8019  // they will be consistent with the maps.
8020  // Note: importer_ isa Teuchos::RCP<const import_type>
8021  // exporter_ isa Teuchos::RCP<const export_type>
8022 
8023  return output;
8024  }
8025 
8026 
8027 
8028 } // namespace Tpetra
8029 
8030 //
8031 // Explicit instantiation macros
8032 //
8033 // Must be expanded from within the Tpetra namespace!
8034 //
8035 #define TPETRA_CRSGRAPH_GRAPH_INSTANT(LO,GO,NODE) \
8036  template class CrsGraph< LO , GO , NODE >;
8037 
8038 #define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
8039  template<> \
8040  Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
8041  importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
8042  const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8043  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8044  CrsGraph<LO,GO,NODE>::node_type>& importer, \
8045  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8046  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8047  CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
8048  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8049  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8050  CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
8051  const Teuchos::RCP<Teuchos::ParameterList>& params);
8052 
8053 #define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
8054  template<> \
8055  Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
8056  importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
8057  const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8058  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8059  CrsGraph<LO,GO,NODE>::node_type>& rowImporter, \
8060  const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8061  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8062  CrsGraph<LO,GO,NODE>::node_type>& domainImporter, \
8063  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8064  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8065  CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
8066  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8067  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8068  CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
8069  const Teuchos::RCP<Teuchos::ParameterList>& params);
8070 
8071 
8072 #define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
8073  template<> \
8074  Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
8075  exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
8076  const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8077  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8078  CrsGraph<LO,GO,NODE>::node_type>& exporter, \
8079  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8080  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8081  CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
8082  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8083  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8084  CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
8085  const Teuchos::RCP<Teuchos::ParameterList>& params);
8086 
8087 #define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
8088  template<> \
8089  Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
8090  exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
8091  const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8092  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8093  CrsGraph<LO,GO,NODE>::node_type>& rowExporter, \
8094  const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8095  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8096  CrsGraph<LO,GO,NODE>::node_type>& domainExporter, \
8097  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8098  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8099  CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
8100  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8101  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8102  CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
8103  const Teuchos::RCP<Teuchos::ParameterList>& params);
8104 
8105 
8106 // WARNING: These macros exist only for backwards compatibility.
8107 // We will remove them at some point.
8108 #define TPETRA_CRSGRAPH_SORTROWINDICESANDVALUES_INSTANT(S,LO,GO,NODE)
8109 #define TPETRA_CRSGRAPH_MERGEROWINDICESANDVALUES_INSTANT(S,LO,GO,NODE)
8110 #define TPETRA_CRSGRAPH_ALLOCATEVALUES1D_INSTANT(S,LO,GO,NODE)
8111 #define TPETRA_CRSGRAPH_ALLOCATEVALUES2D_INSTANT(S,LO,GO,NODE)
8112 
8113 #define TPETRA_CRSGRAPH_INSTANT(S,LO,GO,NODE) \
8114  TPETRA_CRSGRAPH_SORTROWINDICESANDVALUES_INSTANT(S,LO,GO,NODE) \
8115  TPETRA_CRSGRAPH_MERGEROWINDICESANDVALUES_INSTANT(S,LO,GO,NODE) \
8116  TPETRA_CRSGRAPH_ALLOCATEVALUES1D_INSTANT(S,LO,GO,NODE) \
8117  TPETRA_CRSGRAPH_ALLOCATEVALUES2D_INSTANT(S,LO,GO,NODE) \
8118  TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
8119  TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
8120  TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
8121  TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE)
8122 
8123 
8124 #endif // TPETRA_CRSGRAPH_DEF_HPP
Teuchos::ArrayRCP< Teuchos::Array< local_ordinal_type > > lclInds2D_
Local column indices for all rows.
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
void setDomainRangeMaps(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap)
Teuchos::RCP< const map_type > getRowMap() const override
Returns the Map that describes the row distribution in this graph.
void setAllIndices(const typename local_graph_type::row_map_type &rowPointers, const typename local_graph_type::entries_type::non_const_type &columnIndices)
Set the graph&#39;s data directly, using 1-D storage.
size_t nodeMaxNumRowEntries_
Local maximum of the number of entries in each row.
bool indicesAreSorted_
Whether the graph&#39;s indices are sorted in each row, on this process.
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types...
Teuchos::RCP< const map_type > rangeMap_
The Map describing the range of the (matrix corresponding to the) graph.
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
size_t getNodeMaxNumRowEntries() const override
Maximum number of entries in any row of the graph, on this process.
bool isNodeGlobalElement(GlobalOrdinal globalIndex) const
Whether the given global index is owned by this Map on the calling process.
void insertGlobalIndicesFiltered(const local_ordinal_type lclRow, const global_ordinal_type gblColInds[], const local_ordinal_type numGblColInds)
Like insertGlobalIndices(), but with column Map filtering.
bool lowerTriangular_
Whether the graph is locally lower triangular.
size_t nodeNumDiags_
Local number of (populated) diagonal entries.
bool sortGhostsAssociatedWithEachProcessor_
Whether to require makeColMap() (and therefore fillComplete()) to order column Map GIDs associated wi...
size_t insertGlobalIndicesImpl(const local_ordinal_type lclRow, const global_ordinal_type inputGblColInds[], const size_t numInputInds)
Insert global indices, using an input local row index.
void resumeFill(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Resume fill operations.
bool haveGlobalConstants_
Whether all processes have computed global constants.
void expertStaticFillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< const import_type > &importer=Teuchos::null, const Teuchos::RCP< const export_type > &exporter=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Perform a fillComplete on a graph that already has data, via setAllIndices().
Teuchos::RCP< const import_type > getImporter() const override
Returns the importer associated with this graph.
void getLocalDiagOffsets(const Kokkos::View< size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Get offsets of the diagonal entries in the graph.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Kokkos::View< global_ordinal_type *, execution_space > t_GlobalOrdinal_1D
Type of the k_gblInds1D_ array of global column indices.
t_GlobalOrdinal_1D k_gblInds1D_
Global column indices for all rows.
void insertGlobalIndices(const global_ordinal_type globalRow, const Teuchos::ArrayView< const global_ordinal_type > &indices)
Insert global indices into the graph.
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Get the number of entries in the given row (local index).
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const override
Default parameter list suitable for validation.
bool isIdenticalTo(const CrsGraph< LocalOrdinal, GlobalOrdinal, Node > &graph) const
Create a cloned CrsGraph for a different Node type.
bool isMerged() const
Whether duplicate column indices in each row have been merged.
virtual bool checkSizes(const SrcDistObject &source) override
Compare the source and target (this) objects for compatibility.
global_size_t globalNumDiags_
Global number of (populated) diagonal entries.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
size_t getGlobalMaxNumRowEntries() const override
Maximum number of entries in any row of the graph, over all processes in the graph&#39;s communicator...
void getLocalRowView(const local_ordinal_type lclRow, Teuchos::ArrayView< const local_ordinal_type > &lclColInds) const override
Get a const, non-persisting view of the given local row&#39;s local column indices, as a Teuchos::ArrayVi...
Declare and define Tpetra::Details::copyOffsets, an implementation detail of Tpetra (in particular...
bool noRedundancies_
Whether the graph&#39;s indices are non-redundant (merged) in each row, on this process.
bool upperTriangular_
Whether the graph is locally upper triangular.
static bool debug()
Whether Tpetra is in debug mode.
size_t findLocalIndices(const RowInfo &rowInfo, const Teuchos::ArrayView< const local_ordinal_type > &indices, std::function< void(const size_t, const size_t, const size_t)> fun) const
Finds indices in the given row.
size_t getNumAllocatedEntriesInGlobalRow(global_ordinal_type globalRow) const
Current number of allocated entries in the given row on the calling (MPI) process, using a global row index.
std::pair< size_t, std::string > makeIndicesLocal()
Convert column indices from global to local.
Allocation information for a locally owned row in a CrsGraph or CrsMatrix.
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, Distributor &distor, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks...
bool hasColMap() const override
Whether the graph has a column Map.
bool isGloballyIndexed() const override
Whether the graph&#39;s column indices are stored as global indices.
void exportAndFillComplete(Teuchos::RCP< CrsGraph< local_ordinal_type, global_ordinal_type, Node > > &destGraph, const export_type &exporter, const Teuchos::RCP< const map_type > &domainMap=Teuchos::null, const Teuchos::RCP< const map_type > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Export from this to the given destination graph, and make the result fill complete.
Teuchos_Ordinal Array_size_type
Size type for Teuchos Array objects.
bool isStorageOptimized() const
Returns true if storage has been optimized.
bool haveLocalConstants_
Whether this process has computed local constants.
bool isNodeLocalElement(LocalOrdinal localIndex) const
Whether the given local index is valid for this Map on the calling process.
::Tpetra::Details::EStorageStatus storageStatus_
Status of the graph&#39;s storage, when not in a fill-complete state.
std::string description() const override
Return a one-line human-readable description of this object.
local_ordinal_type getLocalViewRawConst(const local_ordinal_type *&lclInds, local_ordinal_type &capacity, const RowInfo &rowInfo) const
Get a pointer to the local column indices of a locally owned row, using the result of getRowInfo...
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator, in rank order.
void makeColMap(Teuchos::Array< int > &remotePIDs)
Make and set the graph&#39;s column Map.
Teuchos::RCP< const export_type > getExporter() const override
Returns the exporter associated with this graph.
void getNumEntriesPerLocalRowUpperBound(Teuchos::ArrayRCP< const size_t > &boundPerLocalRow, size_t &boundForAllLocalRows, bool &boundSameForAllLocalRows) const
Get an upper bound on the number of entries that can be stored in each row.
size_t global_size_t
Global size_t object.
size_t getNodeNumEntries() const override
The local number of entries in the graph.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap) override
Remove processes owning zero rows from the Maps and their communicator.
Node node_type
This class&#39; Kokkos Node type.
void reindexColumns(const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortIndicesInEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
void makeImportExport(Teuchos::Array< int > &remotePIDs, const bool useRemotePIDs)
Make the Import and Export objects, if needed.
size_t sortAndMergeRowIndices(const RowInfo &rowInfo, const bool sorted, const bool merged)
Sort and merge duplicate column indices in the given row.
void insertLocalIndices(const local_ordinal_type localRow, const Teuchos::ArrayView< const local_ordinal_type > &indices)
Insert local indices into the graph.
Insert new values that don&#39;t currently exist.
void getGlobalRowCopy(global_ordinal_type gblRow, const Teuchos::ArrayView< global_ordinal_type > &gblColInds, size_t &numColInds) const override
Get a copy of the given row, using global indices.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified, possibly nondefault Kokkos Node type.
Teuchos::RCP< const map_type > rowMap_
The Map describing the distribution of rows of the graph.
Teuchos::RCP< const import_type > importer_
The Import from the domain Map to the column Map.
size_t insertCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, InOutIndices &curIndices, size_t &numAssigned, InIndices const &newIndices, std::function< void(const size_t, const size_t, const size_t)> cb=std::function< void(const size_t, const size_t, const size_t)>())
Insert new indices in to current list of indices.
num_row_entries_type k_numRowEntries_
The number of local entries in each locally owned row.
Teuchos::RCP< const map_type > domainMap_
The Map describing the domain of the (matrix corresponding to the) graph.
OffsetType convertColumnIndicesFromGlobalToLocal(const Kokkos::View< LO *, DT > &lclColInds, const Kokkos::View< const GO *, DT > &gblColInds, const Kokkos::View< const OffsetType *, DT > &ptr, const LocalMap< LO, GO, DT > &lclColMap, const Kokkos::View< const NumEntType *, DT > &numRowEnt)
Convert a (StaticProfile) CrsGraph&#39;s global column indices into local column indices.
global_size_t getGlobalNumRows() const override
Returns the number of global rows in the graph.
Teuchos::ArrayView< const global_ordinal_type > getGlobalView(const RowInfo &rowinfo) const
Get a const, nonowned, globally indexed view of the locally owned row myRow, such that rowinfo = getR...
Functions for manipulating CRS arrays.
Declare and define the function Tpetra::Details::computeOffsetsFromCounts, an implementation detail o...
void setLocallyModified()
Report that we made a local modification to its structure.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
local_graph_type::entries_type::non_const_type k_lclInds1D_
Local column indices for all rows.
Teuchos::ArrayView< local_ordinal_type > getLocalViewNonConst(const RowInfo &rowinfo)
Get a nonconst, nonowned, locally indexed view of the locally owned row myRow, such that rowinfo = ge...
size_t getNodeNumRows() const override
Returns the number of graph rows owned on the calling node.
ProfileType getProfileType() const
Returns true if the graph was allocated with static data structures.
void checkInternalState() const
Throw an exception if the internal state is not consistent.
Kokkos::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, execution_space > local_graph_type
The type of the part of the sparse graph on each MPI process.
local_graph_type lclGraph_
Local graph; only initialized after first fillComplete() call.
Sets up and executes a communication plan for a Tpetra DistObject.
local_graph_type::row_map_type::const_type k_rowPtrs_
Row offsets for &quot;1-D&quot; storage.
GlobalOrdinal getGlobalElement(LocalOrdinal localIndex) const
The global index corresponding to the given local index.
CombineMode
Rule for combining data in an Import or Export.
Kokkos::View< const size_t *, execution_space >::HostMirror k_numAllocPerRow_
The maximum number of entries to allow in each locally owned row, per row.
Teuchos::RCP< const export_type > exporter_
The Export from the row Map to the range Map.
void insertGlobalIndicesIntoNonownedRows(const global_ordinal_type gblRow, const global_ordinal_type gblColInds[], const local_ordinal_type numGblColInds)
Implementation of insertGlobalIndices for nonowned rows.
LocalTriangularStructureResult< typename LocalMapType::local_ordinal_type > determineLocalTriangularStructure(const LocalGraphType &G, const LocalMapType &rowMap, const LocalMapType &colMap, const bool ignoreMapsForTriangularStructure)
Count the local number of diagonal entries in a local sparse graph, and determine whether the local p...
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, Distributor &distor, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
Declaration and definition of Tpetra::Details::determineLocalTriangularStructure. ...
bool isFillComplete() const override
Whether fillComplete() has been called and the graph is in compute mode.
size_t getNumAllocatedEntriesInLocalRow(local_ordinal_type localRow) const
Current number of allocated entries in the given row on the calling (MPI) process, using a local row index.
void computeLocalConstants(const bool computeLocalTriangularConstants)
Compute local constants, if they have not yet been computed.
Teuchos::ArrayRCP< const size_t > getNodeRowPtrs() const
Get a host view of the row offsets.
void swap(CrsGraph< local_ordinal_type, global_ordinal_type, Node > &graph)
Swaps the data from *this with the data and maps from graph.
void globalAssemble()
Communicate nonlocal contributions to other processes.
CrsGraphType::global_ordinal_type getGlobalNumDiags(const CrsGraphType &G)
Number of populated diagonal entries in the given sparse graph, over all processes in the graph&#39;s (MP...
typename device_type::execution_space execution_space
This class&#39; Kokkos execution space.
Teuchos::ArrayRCP< Teuchos::Array< global_ordinal_type > > gblInds2D_
Global column indices for all rows.
RowInfo getRowInfoFromGlobalRowIndex(const global_ordinal_type gblRow) const
Get information about the locally owned row with global index gblRow.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
Utility functions for packing and unpacking sparse matrix entries.
void packCrsGraph(const CrsGraph< LO, GO, NT > &sourceGraph, Teuchos::Array< typename CrsGraph< LO, GO, NT >::packet_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse graph for communication.
Abstract base class for objects that can be the source of an Import or Export operation.
OffsetsViewType::non_const_value_type computeOffsetsFromConstantCount(const OffsetsViewType &ptr, const CountType &count)
Compute offsets from a constant count.
void removeLocalIndices(local_ordinal_type localRow)
Remove all graph indices from the specified local row.
size_t getNodeAllocationSize() const
The local number of indices allocated for the graph, over all rows on the calling (MPI) process...
LocalOrdinal local_ordinal_type
The type of the graph&#39;s local indices.
global_size_t globalNumEntries_
Global number of entries in the graph.
Teuchos::ArrayView< global_ordinal_type > getGlobalViewNonConst(const RowInfo &rowinfo)
Get a nonconst, nonowned, globally indexed view of the locally owned row myRow, such that rowinfo = g...
ProfileType pftype_
Whether the graph was allocated with static or dynamic profile.
Teuchos::RCP< const map_type > getRangeMap() const override
Returns the Map associated with the domain of this graph.
void replaceColMap(const Teuchos::RCP< const map_type > &newColMap)
Replace the graph&#39;s current column Map with the given Map.
void getGlobalRowView(const global_ordinal_type gblRow, Teuchos::ArrayView< const global_ordinal_type > &gblColInds) const override
Get a const, non-persisting view of the given global row&#39;s global column indices, as a Teuchos::Array...
::Kokkos::Compat::KokkosDeviceWrapperNode< execution_space > node_type
Default value of Node template parameter.
global_size_t getGlobalNumCols() const override
Returns the number of global columns in the graph.
RowInfo getRowInfo(const local_ordinal_type myRow) const
Get information about the locally owned row with local index myRow.
size_t getNodeNumCols() const override
Returns the number of columns connected to the locally owned rows of this graph.
void replaceDomainMapAndImporter(const Teuchos::RCP< const map_type > &newDomainMap, const Teuchos::RCP< const import_type > &newImporter)
Replace the current domain Map and Import with the given parameters.
bool supportsRowViews() const override
Whether this class implements getLocalRowView() and getGlobalRowView() (it does). ...
void padCrsArrays(RowPtr &rowPtrBeg, RowPtr &rowPtrEnd, Indices &indices, const Padding &padding)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries...
LocalOrdinal getLocalElement(GlobalOrdinal globalIndex) const
The local index corresponding to the given global index.
local_map_type getLocalMap() const
Get the local Map for Kokkos kernels.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
void computeGlobalConstants(const bool computeLocalTriangularConstants)
Compute global constants, if they have not yet been computed.
Teuchos::ArrayView< const local_ordinal_type > getLocalView(const RowInfo &rowinfo) const
Get a const, nonowned, locally indexed view of the locally owned row myRow, such that rowinfo = getRo...
A parallel distribution of indices over processes.
local_ordinal_type getGlobalViewRawConst(const global_ordinal_type *&gblInds, local_ordinal_type &capacity, const RowInfo &rowInfo) const
Get a pointer to the global column indices of a locally owned row, using the result of getRowInfoFrom...
int makeColMap(Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &colMap, Teuchos::Array< int > &remotePIDs, const Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &domMap, const RowGraph< LO, GO, NT > &graph, const bool sortEachProcsGids=true, std::ostream *errStrm=NULL)
Make the graph&#39;s column Map.
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects...
void setParameterList(const Teuchos::RCP< Teuchos::ParameterList > &params) override
Set the given list of parameters (must be nonnull).
size_t getNumEntriesInGlobalRow(global_ordinal_type globalRow) const override
Returns the current number of entries on this node in the specified global row.
void fillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Tell the graph that you are done changing its structure.
typename Node::device_type device_type
This class&#39; Kokkos device type.
bool isLocallyIndexed() const override
Whether the graph&#39;s column indices are stored as local indices.
A distributed dense vector.
Teuchos::ArrayRCP< const local_ordinal_type > getNodePackedIndices() const
Get an Teuchos::ArrayRCP of the packed column-indices.
bool isFillActive() const
Whether resumeFill() has been called and the graph is in edit mode.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
Returns the communicator.
size_t findCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, Indices1 const &curIndices, Indices2 const &newIndices, Callback &&cb)
Finds offsets in to current list of indices.
void importAndFillComplete(Teuchos::RCP< CrsGraph< local_ordinal_type, global_ordinal_type, Node > > &destGraph, const import_type &importer, const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Import from this to the given destination graph, and make the result fill complete.
global_size_t globalMaxNumRowEntries_
Global maximum of the number of entries in each row.
local_graph_type getLocalGraph() const
Get the local graph.
void packCrsGraphNew(const CrsGraph< LO, GO, NT > &sourceGraph, const Kokkos::DualView< const LO *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportLIDs, const Kokkos::DualView< const int *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportPIDs, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports, Kokkos::DualView< size_t *, typename CrsGraph< LO, GO, NT >::buffer_device_type > numPacketsPerLID, size_t &constantNumPackets, const bool pack_pids, Distributor &distor)
Pack specified entries of the given local sparse graph for communication, for &quot;new&quot; DistObject interf...
size_t insertIndices(RowInfo &rowInfo, const SLocalGlobalViews &newInds, const ELocalGlobal lg, const ELocalGlobal I)
Insert indices into the given row.
global_size_t getGlobalNumEntries() const override
Returns the global number of entries in the graph.
Declaration and definition of Tpetra::Details::getEntryOnHost.
size_t numAllocForAllRows_
The maximum number of entries to allow in each locally owned row.
Teuchos::RCP< const map_type > colMap_
The Map describing the distribution of columns of the graph.
global_ordinal_type getIndexBase() const override
Returns the index base for global indices for this graph.
void packCrsGraphWithOwningPIDs(const CrsGraph< LO, GO, NT > &sourceGraph, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse graph for communication.
virtual void pack(const Teuchos::ArrayView< const local_ordinal_type > &exportLIDs, Teuchos::Array< global_ordinal_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, size_t &constantNumPackets, Distributor &distor) const override
Pack this object&#39;s data for Import or Export.
void getLocalRowCopy(local_ordinal_type lclRow, const Teuchos::ArrayView< local_ordinal_type > &lclColInds, size_t &numColInds) const override
Get a copy of the given row, using local indices.
nonlocals_type nonlocals_
Nonlocal data given to insertGlobalIndices.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const override
Print this object to the given output stream with the given verbosity level.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.