Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Tpetra_CrsGraph_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ************************************************************************
38 // @HEADER
39 
40 #ifndef TPETRA_CRSGRAPH_DEF_HPP
41 #define TPETRA_CRSGRAPH_DEF_HPP
42 
50 
54 #include "Tpetra_Details_gathervPrint.hpp"
55 #include "Tpetra_Details_getGraphDiagOffsets.hpp"
56 #include "Tpetra_Details_makeColMap.hpp"
60 #include "Tpetra_Distributor.hpp"
61 #include "Teuchos_SerialDenseMatrix.hpp"
62 #include "Tpetra_Vector.hpp"
63 #include "Tpetra_Import_Util.hpp"
64 #include "Tpetra_Import_Util2.hpp"
65 #include "Tpetra_Details_packCrsGraph.hpp"
66 #include "Tpetra_Details_unpackCrsGraphAndCombine.hpp"
68 #include <algorithm>
69 #include <limits>
70 #include <map>
71 #include <sstream>
72 #include <string>
73 #include <type_traits>
74 #include <utility>
75 #include <vector>
76 
77 namespace Tpetra {
78  namespace Details {
79  namespace Impl {
80 
81  template<class LO, class GO, class DT, class OffsetType, class NumEntType>
82  class ConvertColumnIndicesFromGlobalToLocal {
83  public:
84  ConvertColumnIndicesFromGlobalToLocal (const ::Kokkos::View<LO*, DT>& lclColInds,
85  const ::Kokkos::View<const GO*, DT>& gblColInds,
86  const ::Kokkos::View<const OffsetType*, DT>& ptr,
87  const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
88  const ::Kokkos::View<const NumEntType*, DT>& numRowEnt) :
89  lclColInds_ (lclColInds),
90  gblColInds_ (gblColInds),
91  ptr_ (ptr),
92  lclColMap_ (lclColMap),
93  numRowEnt_ (numRowEnt)
94  {}
95 
96  KOKKOS_FUNCTION void
97  operator () (const LO& lclRow, OffsetType& curNumBad) const
98  {
99  const OffsetType offset = ptr_(lclRow);
100  // NOTE (mfh 26 Jun 2016) It's always legal to cast the number
101  // of entries in a row to LO, as long as the row doesn't have
102  // too many duplicate entries.
103  const LO numEnt = static_cast<LO> (numRowEnt_(lclRow));
104  for (LO j = 0; j < numEnt; ++j) {
105  const GO gid = gblColInds_(offset + j);
106  const LO lid = lclColMap_.getLocalElement (gid);
107  lclColInds_(offset + j) = lid;
108  if (lid == ::Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
109  ++curNumBad;
110  }
111  }
112  }
113 
114  static OffsetType
115  run (const ::Kokkos::View<LO*, DT>& lclColInds,
116  const ::Kokkos::View<const GO*, DT>& gblColInds,
117  const ::Kokkos::View<const OffsetType*, DT>& ptr,
118  const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
119  const ::Kokkos::View<const NumEntType*, DT>& numRowEnt)
120  {
121  typedef ::Kokkos::RangePolicy<typename DT::execution_space, LO> range_type;
122  typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> functor_type;
123 
124  const LO lclNumRows = ptr.extent (0) == 0 ?
125  static_cast<LO> (0) : static_cast<LO> (ptr.extent (0) - 1);
126  OffsetType numBad = 0;
127  // Count of "bad" column indices is a reduction over rows.
128  ::Kokkos::parallel_reduce (range_type (0, lclNumRows),
129  functor_type (lclColInds, gblColInds, ptr,
130  lclColMap, numRowEnt),
131  numBad);
132  return numBad;
133  }
134 
135  private:
136  ::Kokkos::View<LO*, DT> lclColInds_;
137  ::Kokkos::View<const GO*, DT> gblColInds_;
138  ::Kokkos::View<const OffsetType*, DT> ptr_;
140  ::Kokkos::View<const NumEntType*, DT> numRowEnt_;
141  };
142 
143  } // namespace Impl
144 
159  template<class LO, class GO, class DT, class OffsetType, class NumEntType>
160  OffsetType
161  convertColumnIndicesFromGlobalToLocal (const Kokkos::View<LO*, DT>& lclColInds,
162  const Kokkos::View<const GO*, DT>& gblColInds,
163  const Kokkos::View<const OffsetType*, DT>& ptr,
164  const LocalMap<LO, GO, DT>& lclColMap,
165  const Kokkos::View<const NumEntType*, DT>& numRowEnt)
166  {
167  using Impl::ConvertColumnIndicesFromGlobalToLocal;
168  typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> impl_type;
169  return impl_type::run (lclColInds, gblColInds, ptr, lclColMap, numRowEnt);
170  }
171 
172  template<class ViewType, class LO>
173  class MaxDifference {
174  public:
175  MaxDifference (const ViewType& ptr) : ptr_ (ptr) {}
176 
177  KOKKOS_INLINE_FUNCTION void init (LO& dst) const {
178  dst = 0;
179  }
180 
181  KOKKOS_INLINE_FUNCTION void
182  join (volatile LO& dst, const volatile LO& src) const
183  {
184  dst = (src > dst) ? src : dst;
185  }
186 
187  KOKKOS_INLINE_FUNCTION void
188  operator () (const LO lclRow, LO& maxNumEnt) const
189  {
190  const LO numEnt = static_cast<LO> (ptr_(lclRow+1) - ptr_(lclRow));
191  maxNumEnt = (numEnt > maxNumEnt) ? numEnt : maxNumEnt;
192  }
193  private:
194  typename ViewType::const_type ptr_;
195  };
196 
197  template<class ViewType, class LO>
198  typename ViewType::non_const_value_type
199  maxDifference (const char kernelLabel[],
200  const ViewType& ptr,
201  const LO lclNumRows)
202  {
203  if (lclNumRows == 0) {
204  // mfh 07 May 2018: Weirdly, I need this special case,
205  // otherwise I get the wrong answer.
206  return static_cast<LO> (0);
207  }
208  else {
209  using execution_space = typename ViewType::execution_space;
210  using range_type = Kokkos::RangePolicy<execution_space, LO>;
211  LO theMaxNumEnt {0};
212  Kokkos::parallel_reduce (kernelLabel,
213  range_type (0, lclNumRows),
214  MaxDifference<ViewType, LO> (ptr),
215  theMaxNumEnt);
216  return theMaxNumEnt;
217  }
218  }
219 
220  } // namespace Details
221 
222  template <class LocalOrdinal, class GlobalOrdinal, class Node>
223  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
224  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
225  const size_t maxNumEntriesPerRow,
226  const ProfileType pftype,
227  const Teuchos::RCP<Teuchos::ParameterList>& params) :
228  dist_object_type (rowMap)
229  , rowMap_ (rowMap)
230  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
231  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
232  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
233  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
234  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
235  , pftype_ (pftype)
236  , numAllocForAllRows_ (maxNumEntriesPerRow)
237  , storageStatus_ (pftype == StaticProfile ?
238  ::Tpetra::Details::STORAGE_1D_UNPACKED :
239  ::Tpetra::Details::STORAGE_2D)
240  , indicesAreAllocated_ (false)
241  , indicesAreLocal_ (false)
242  , indicesAreGlobal_ (false)
243  , fillComplete_ (false)
244  , lowerTriangular_ (false)
245  , upperTriangular_ (false)
246  , indicesAreSorted_ (true)
247  , noRedundancies_ (true)
248  , haveLocalConstants_ (false)
249  , haveGlobalConstants_ (false)
250  , sortGhostsAssociatedWithEachProcessor_ (true)
251  {
252  const char tfecfFuncName[] = "CrsGraph(rowMap,maxNumEntriesPerRow,"
253  "pftype,params): ";
254  staticAssertions ();
255  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
256  (maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid (),
257  std::invalid_argument, "The allocation hint maxNumEntriesPerRow must be "
258  "a valid size_t value, which in this case means it must not be "
259  "Teuchos::OrdinalTraits<size_t>::invalid().");
260  resumeFill (params);
262  }
263 
264  template <class LocalOrdinal, class GlobalOrdinal, class Node>
266  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
267  const Teuchos::RCP<const map_type>& colMap,
268  const size_t maxNumEntriesPerRow,
269  const ProfileType pftype,
270  const Teuchos::RCP<Teuchos::ParameterList>& params) :
271  dist_object_type (rowMap)
272  , rowMap_ (rowMap)
273  , colMap_ (colMap)
274  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
275  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
276  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
277  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
278  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
279  , pftype_ (pftype)
280  , numAllocForAllRows_ (maxNumEntriesPerRow)
281  , storageStatus_ (pftype == StaticProfile ?
282  ::Tpetra::Details::STORAGE_1D_UNPACKED :
283  ::Tpetra::Details::STORAGE_2D)
284  , indicesAreAllocated_ (false)
285  , indicesAreLocal_ (false)
286  , indicesAreGlobal_ (false)
287  , fillComplete_ (false)
288  , lowerTriangular_ (false)
289  , upperTriangular_ (false)
290  , indicesAreSorted_ (true)
291  , noRedundancies_ (true)
292  , haveLocalConstants_ (false)
293  , haveGlobalConstants_ (false)
294  , sortGhostsAssociatedWithEachProcessor_ (true)
295  {
296  const char tfecfFuncName[] = "CrsGraph(rowMap,colMap,maxNumEntriesPerRow,"
297  "pftype,params): ";
298  staticAssertions ();
299  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
300  maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid (),
301  std::invalid_argument, "The allocation hint maxNumEntriesPerRow must be "
302  "a valid size_t value, which in this case means it must not be "
303  "Teuchos::OrdinalTraits<size_t>::invalid().");
304  resumeFill (params);
306  }
307 
308  template <class LocalOrdinal, class GlobalOrdinal, class Node>
310  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
311  const Teuchos::ArrayView<const size_t>& numEntPerRow,
312  const ProfileType pftype,
313  const Teuchos::RCP<Teuchos::ParameterList>& params) :
314  dist_object_type (rowMap)
315  , rowMap_ (rowMap)
316  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
317  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
318  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
319  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
320  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
321  , pftype_ (pftype)
322  , numAllocForAllRows_ (0)
323  , storageStatus_ (pftype == StaticProfile ?
324  ::Tpetra::Details::STORAGE_1D_UNPACKED :
325  ::Tpetra::Details::STORAGE_2D)
326  , indicesAreAllocated_ (false)
327  , indicesAreLocal_ (false)
328  , indicesAreGlobal_ (false)
329  , fillComplete_ (false)
330  , lowerTriangular_ (false)
331  , upperTriangular_ (false)
332  , indicesAreSorted_ (true)
333  , noRedundancies_ (true)
334  , haveLocalConstants_ (false)
335  , haveGlobalConstants_ (false)
336  , sortGhostsAssociatedWithEachProcessor_ (true)
337  {
338  const char tfecfFuncName[] = "CrsGraph(rowMap,numEntPerRow,pftype,params): ";
339  staticAssertions ();
340 
341  const size_t lclNumRows = rowMap.is_null () ?
342  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
343  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
344  static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
345  std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
346  << " != the local number of rows " << lclNumRows << " as specified by "
347  "the input row Map.");
348 
349  const bool debug = ::Tpetra::Details::Behavior::debug ();
350  if (debug) {
351  for (size_t r = 0; r < lclNumRows; ++r) {
352  const size_t curRowCount = numEntPerRow[r];
353  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
354  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
355  std::invalid_argument, "numEntPerRow(" << r << ") "
356  "specifies an invalid number of entries "
357  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
358  }
359  }
360 
361  // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
362  // The latter is a const View, so we have to copy into a nonconst
363  // View first, then assign.
364  typedef decltype (k_numAllocPerRow_) out_view_type;
365  typedef typename out_view_type::non_const_type nc_view_type;
366  typedef Kokkos::View<const size_t*,
367  typename nc_view_type::array_layout,
368  Kokkos::HostSpace,
369  Kokkos::MemoryUnmanaged> in_view_type;
370  in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
371  nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
372  lclNumRows);
373  Kokkos::deep_copy (numAllocPerRowOut, numAllocPerRowIn);
374  k_numAllocPerRow_ = numAllocPerRowOut;
375 
376  resumeFill (params);
378  }
379 
380 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
381  template <class LocalOrdinal, class GlobalOrdinal, class Node>
383  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
384  const Teuchos::ArrayRCP<const size_t>& numEntPerRow,
385  const ProfileType pftype,
386  const Teuchos::RCP<Teuchos::ParameterList>& params) :
387  dist_object_type (rowMap)
388  , rowMap_ (rowMap)
389  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
390  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
391  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
392  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
393  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
394  , pftype_ (pftype)
395  , numAllocForAllRows_ (0)
396  , storageStatus_ (pftype == StaticProfile ?
397  ::Tpetra::Details::STORAGE_1D_UNPACKED :
398  ::Tpetra::Details::STORAGE_2D)
399  , indicesAreAllocated_ (false)
400  , indicesAreLocal_ (false)
401  , indicesAreGlobal_ (false)
402  , fillComplete_ (false)
403  , lowerTriangular_ (false)
404  , upperTriangular_ (false)
405  , indicesAreSorted_ (true)
406  , noRedundancies_ (true)
407  , haveLocalConstants_ (false)
408  , haveGlobalConstants_ (false)
409  , sortGhostsAssociatedWithEachProcessor_ (true)
410  {
411  const char tfecfFuncName[] = "CrsGraph(RCP<const Map>,"
412  "ArrayRCP<const size_t>,ProfileType,RCP<ParameterList>): ";
413  staticAssertions ();
414 
415  const size_t lclNumRows = rowMap.is_null () ?
416  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
417  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
418  static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
419  std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
420  << " != the local number of rows " << lclNumRows << " as specified by "
421  "the input row Map.");
422 
423  const bool debug = ::Tpetra::Details::Behavior::debug ();
424  if (debug) {
425  for (size_t r = 0; r < lclNumRows; ++r) {
426  const size_t curRowCount = numEntPerRow[r];
427  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
428  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
429  std::invalid_argument, "numEntPerRow(" << r << ") "
430  "specifies an invalid number of entries "
431  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
432  }
433  }
434 
435  // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
436  // The latter is a const View, so we have to copy into a nonconst
437  // View first, then assign.
438  typedef decltype (k_numAllocPerRow_) out_view_type;
439  typedef typename out_view_type::non_const_type nc_view_type;
440  typedef Kokkos::View<const size_t*,
441  typename nc_view_type::array_layout,
442  Kokkos::HostSpace,
443  Kokkos::MemoryUnmanaged> in_view_type;
444  in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
445  nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
446  lclNumRows);
447  Kokkos::deep_copy (numAllocPerRowOut, numAllocPerRowIn);
448  k_numAllocPerRow_ = numAllocPerRowOut;
449 
450  resumeFill (params);
452  }
453 #endif // TPETRA_ENABLE_DEPRECATED_CODE
454 
455 
456  template <class LocalOrdinal, class GlobalOrdinal, class Node>
457  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
458  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
459  const Kokkos::DualView<const size_t*, execution_space>& numEntPerRow,
460  const ProfileType pftype,
461  const Teuchos::RCP<Teuchos::ParameterList>& params) :
462  dist_object_type (rowMap)
463  , rowMap_ (rowMap)
464  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
465  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
466  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
467  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
468  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
469  , pftype_ (pftype)
470  , k_numAllocPerRow_ (numEntPerRow.h_view)
471  , numAllocForAllRows_ (0)
472  , storageStatus_ (pftype == StaticProfile ?
473  ::Tpetra::Details::STORAGE_1D_UNPACKED :
474  ::Tpetra::Details::STORAGE_2D)
475  , indicesAreAllocated_ (false)
476  , indicesAreLocal_ (false)
477  , indicesAreGlobal_ (false)
478  , fillComplete_ (false)
479  , lowerTriangular_ (false)
480  , upperTriangular_ (false)
481  , indicesAreSorted_ (true)
482  , noRedundancies_ (true)
483  , haveLocalConstants_ (false)
484  , haveGlobalConstants_ (false)
486  {
487  const char tfecfFuncName[] = "CrsGraph(rowMap,numEntPerRow,pftype,params): ";
488  staticAssertions ();
489 
490  const size_t lclNumRows = rowMap.is_null () ?
491  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
492  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
493  static_cast<size_t> (numEntPerRow.extent (0)) != lclNumRows,
494  std::invalid_argument, "numEntPerRow has length " <<
495  numEntPerRow.extent (0) << " != the local number of rows " <<
496  lclNumRows << " as specified by " "the input row Map.");
497 
498  const bool debug = ::Tpetra::Details::Behavior::debug ();
499  if (debug) {
500  for (size_t r = 0; r < lclNumRows; ++r) {
501  const size_t curRowCount = numEntPerRow.h_view(r);
502  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
503  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
504  std::invalid_argument, "numEntPerRow(" << r << ") "
505  "specifies an invalid number of entries "
506  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
507  }
508  }
509 
510  resumeFill (params);
512  }
513 
514 
515  template <class LocalOrdinal, class GlobalOrdinal, class Node>
517  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
518  const Teuchos::RCP<const map_type>& colMap,
519  const Kokkos::DualView<const size_t*, execution_space>& numEntPerRow,
520  const ProfileType pftype,
521  const Teuchos::RCP<Teuchos::ParameterList>& params) :
522  dist_object_type (rowMap)
523  , rowMap_ (rowMap)
524  , colMap_ (colMap)
525  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
526  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
527  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
528  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
529  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
530  , pftype_ (pftype)
531  , k_numAllocPerRow_ (numEntPerRow.h_view)
532  , numAllocForAllRows_ (0)
533  , storageStatus_ (pftype == StaticProfile ?
534  ::Tpetra::Details::STORAGE_1D_UNPACKED :
535  ::Tpetra::Details::STORAGE_2D)
536  , indicesAreAllocated_ (false)
537  , indicesAreLocal_ (false)
538  , indicesAreGlobal_ (false)
539  , fillComplete_ (false)
540  , lowerTriangular_ (false)
541  , upperTriangular_ (false)
542  , indicesAreSorted_ (true)
543  , noRedundancies_ (true)
544  , haveLocalConstants_ (false)
545  , haveGlobalConstants_ (false)
546  , sortGhostsAssociatedWithEachProcessor_ (true)
547  {
548  const char tfecfFuncName[] = "CrsGraph(rowMap,colMap,numEntPerRow,pftype,params): ";
549  staticAssertions ();
550 
551  const size_t lclNumRows = rowMap.is_null () ?
552  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
553  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
554  static_cast<size_t> (numEntPerRow.extent (0)) != lclNumRows,
555  std::invalid_argument, "numEntPerRow has length " <<
556  numEntPerRow.extent (0) << " != the local number of rows " <<
557  lclNumRows << " as specified by " "the input row Map.");
558 
559  const bool debug = ::Tpetra::Details::Behavior::debug ();
560  if (debug) {
561  for (size_t r = 0; r < lclNumRows; ++r) {
562  const size_t curRowCount = numEntPerRow.h_view(r);
563  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
564  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
565  std::invalid_argument, "numEntPerRow(" << r << ") "
566  "specifies an invalid number of entries "
567  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
568  }
569  }
570 
571  resumeFill (params);
573  }
574 
575 
576  template <class LocalOrdinal, class GlobalOrdinal, class Node>
578  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
579  const Teuchos::RCP<const map_type>& colMap,
580  const Teuchos::ArrayView<const size_t>& numEntPerRow,
581  const ProfileType pftype,
582  const Teuchos::RCP<Teuchos::ParameterList>& params) :
583  dist_object_type (rowMap)
584  , rowMap_ (rowMap)
585  , colMap_ (colMap)
586  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
587  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
588  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
589  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
590  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
591  , pftype_ (pftype)
592  , numAllocForAllRows_ (0)
593  , storageStatus_ (pftype == StaticProfile ?
594  ::Tpetra::Details::STORAGE_1D_UNPACKED :
595  ::Tpetra::Details::STORAGE_2D)
596  , indicesAreAllocated_ (false)
597  , indicesAreLocal_ (false)
598  , indicesAreGlobal_ (false)
599  , fillComplete_ (false)
600  , lowerTriangular_ (false)
601  , upperTriangular_ (false)
602  , indicesAreSorted_ (true)
603  , noRedundancies_ (true)
604  , haveLocalConstants_ (false)
605  , haveGlobalConstants_ (false)
606  , sortGhostsAssociatedWithEachProcessor_ (true)
607  {
608  const char tfecfFuncName[] = "CrsGraph(rowMap,colMap,numEntPerRow,pftype,"
609  "params): ";
610  staticAssertions ();
611 
612  const size_t lclNumRows = rowMap.is_null () ?
613  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
614  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
615  static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
616  std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
617  << " != the local number of rows " << lclNumRows << " as specified by "
618  "the input row Map.");
619 
620  const bool debug = ::Tpetra::Details::Behavior::debug ();
621  if (debug) {
622  for (size_t r = 0; r < lclNumRows; ++r) {
623  const size_t curRowCount = numEntPerRow[r];
624  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
625  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
626  std::invalid_argument, "numEntPerRow(" << r << ") "
627  "specifies an invalid number of entries "
628  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
629  }
630  }
631 
632  // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
633  // The latter is a const View, so we have to copy into a nonconst
634  // View first, then assign.
635  typedef decltype (k_numAllocPerRow_) out_view_type;
636  typedef typename out_view_type::non_const_type nc_view_type;
637  typedef Kokkos::View<const size_t*,
638  typename nc_view_type::array_layout,
639  Kokkos::HostSpace,
640  Kokkos::MemoryUnmanaged> in_view_type;
641  in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
642  nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
643  lclNumRows);
644  Kokkos::deep_copy (numAllocPerRowOut, numAllocPerRowIn);
645  k_numAllocPerRow_ = numAllocPerRowOut;
646 
647  resumeFill (params);
649  }
650 
651 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
652  template <class LocalOrdinal, class GlobalOrdinal, class Node>
654  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
655  const Teuchos::RCP<const map_type>& colMap,
656  const Teuchos::ArrayRCP<const size_t>& numEntPerRow,
657  const ProfileType pftype,
658  const Teuchos::RCP<Teuchos::ParameterList>& params) :
659  dist_object_type (rowMap)
660  , rowMap_ (rowMap)
661  , colMap_ (colMap)
662  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
663  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
664  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
665  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
666  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
667  , pftype_ (pftype)
668  , numAllocForAllRows_ (0)
669  , storageStatus_ (pftype == StaticProfile ?
670  ::Tpetra::Details::STORAGE_1D_UNPACKED :
671  ::Tpetra::Details::STORAGE_2D)
672  , indicesAreAllocated_ (false)
673  , indicesAreLocal_ (false)
674  , indicesAreGlobal_ (false)
675  , fillComplete_ (false)
676  , lowerTriangular_ (false)
677  , upperTriangular_ (false)
678  , indicesAreSorted_ (true)
679  , noRedundancies_ (true)
680  , haveLocalConstants_ (false)
681  , haveGlobalConstants_ (false)
682  , sortGhostsAssociatedWithEachProcessor_ (true)
683  {
684  const char tfecfFuncName[] = "CrsGraph(RCP<const Map>,RCP<const Map>,"
685  "ArrayRCP<const size_t>,ProfileType,RCP<ParameterList>): ";
686  staticAssertions ();
687 
688  const size_t lclNumRows = rowMap.is_null () ?
689  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
690  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
691  static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
692  std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
693  << " != the local number of rows " << lclNumRows << " as specified by "
694  "the input row Map.");
695 
696  const bool debug = ::Tpetra::Details::Behavior::debug ();
697  if (debug) {
698  for (size_t r = 0; r < lclNumRows; ++r) {
699  const size_t curRowCount = numEntPerRow[r];
700  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
701  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
702  std::invalid_argument, "numEntPerRow(" << r << ") "
703  "specifies an invalid number of entries "
704  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
705  }
706  }
707 
708  // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
709  // The latter is a const View, so we have to copy into a nonconst
710  // View first, then assign.
711  typedef decltype (k_numAllocPerRow_) out_view_type;
712  typedef typename out_view_type::non_const_type nc_view_type;
713  typedef Kokkos::View<const size_t*,
714  typename nc_view_type::array_layout,
715  Kokkos::HostSpace,
716  Kokkos::MemoryUnmanaged> in_view_type;
717  in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
718  nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
719  lclNumRows);
720  Kokkos::deep_copy (numAllocPerRowOut, numAllocPerRowIn);
721  k_numAllocPerRow_ = numAllocPerRowOut;
722 
723  resumeFill (params);
725  }
726 #endif // TPETRA_ENABLE_DEPRECATED_CODE
727 
728  template <class LocalOrdinal, class GlobalOrdinal, class Node>
729  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
730  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
731  const Teuchos::RCP<const map_type>& colMap,
732  const typename local_graph_type::row_map_type& rowPointers,
733  const typename local_graph_type::entries_type::non_const_type& columnIndices,
734  const Teuchos::RCP<Teuchos::ParameterList>& /* params */) :
735  dist_object_type (rowMap)
736  , rowMap_(rowMap)
737  , colMap_(colMap)
738  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
739  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
740  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
741  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
742  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
743  , pftype_(StaticProfile)
745  , storageStatus_ (::Tpetra::Details::STORAGE_1D_PACKED)
746  , indicesAreAllocated_(true)
747  , indicesAreLocal_(true)
748  , indicesAreGlobal_(false)
749  , fillComplete_(false)
750  , lowerTriangular_ (false)
751  , upperTriangular_ (false)
752  , indicesAreSorted_(true)
753  , noRedundancies_(true)
754  , haveLocalConstants_ (false)
755  , haveGlobalConstants_ (false)
757  {
758  staticAssertions ();
759  setAllIndices (rowPointers, columnIndices);
761  }
762 
763  template <class LocalOrdinal, class GlobalOrdinal, class Node>
765  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
766  const Teuchos::RCP<const map_type>& colMap,
767  const Teuchos::ArrayRCP<size_t>& rowPointers,
768  const Teuchos::ArrayRCP<LocalOrdinal> & columnIndices,
769  const Teuchos::RCP<Teuchos::ParameterList>& /* params */) :
770  dist_object_type (rowMap)
771  , rowMap_ (rowMap)
772  , colMap_ (colMap)
773  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
774  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
775  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
776  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
777  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
778  , pftype_ (StaticProfile)
779  , numAllocForAllRows_ (0)
780  , storageStatus_ (::Tpetra::Details::STORAGE_1D_PACKED)
781  , indicesAreAllocated_ (true)
782  , indicesAreLocal_ (true)
783  , indicesAreGlobal_ (false)
784  , fillComplete_ (false)
785  , lowerTriangular_ (false)
786  , upperTriangular_ (false)
787  , indicesAreSorted_ (true)
788  , noRedundancies_ (true)
789  , haveLocalConstants_ (false)
790  , haveGlobalConstants_ (false)
791  , sortGhostsAssociatedWithEachProcessor_ (true)
792  {
793  staticAssertions ();
794  setAllIndices (rowPointers, columnIndices);
796  }
797 
798  template <class LocalOrdinal, class GlobalOrdinal, class Node>
800  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
801  const Teuchos::RCP<const map_type>& colMap,
802  const local_graph_type& k_local_graph_,
803  const Teuchos::RCP<Teuchos::ParameterList>& params)
804  : CrsGraph (k_local_graph_,
805  rowMap,
806  colMap,
807  Teuchos::null,
808  Teuchos::null,
809  params)
810  {}
811 
812  template <class LocalOrdinal, class GlobalOrdinal, class Node>
814  CrsGraph (const local_graph_type& k_local_graph_,
815  const Teuchos::RCP<const map_type>& rowMap,
816  const Teuchos::RCP<const map_type>& colMap,
817  const Teuchos::RCP<const map_type>& domainMap,
818  const Teuchos::RCP<const map_type>& rangeMap,
819  const Teuchos::RCP<Teuchos::ParameterList>& params)
820  : DistObject<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, node_type> (rowMap)
821  , rowMap_ (rowMap)
822  , colMap_ (colMap)
823  , lclGraph_ (k_local_graph_)
824  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
825  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
826  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
827  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
828  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
829  , pftype_ (StaticProfile)
830  , numAllocForAllRows_ (0)
831  , storageStatus_ (::Tpetra::Details::STORAGE_1D_PACKED)
832  , indicesAreAllocated_ (true)
833  , indicesAreLocal_ (true)
834  , indicesAreGlobal_ (false)
835  , fillComplete_ (false)
836  , lowerTriangular_ (false)
837  , upperTriangular_ (false)
838  , indicesAreSorted_ (true)
839  , noRedundancies_ (true)
840  , haveLocalConstants_ (false)
841  , haveGlobalConstants_ (false)
842  , sortGhostsAssociatedWithEachProcessor_ (true)
843  {
844  staticAssertions();
845  const char tfecfFuncName[] = "CrsGraph(Kokkos::LocalStaticCrsGraph,Map,Map,Map,Map)";
846 
847  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
848  colMap.is_null (), std::runtime_error,
849  ": The input column Map must be nonnull.");
850  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
851  k_local_graph_.numRows () != rowMap->getNodeNumElements (),
852  std::runtime_error,
853  ": The input row Map and the input local graph need to have the same "
854  "number of rows. The row Map claims " << rowMap->getNodeNumElements ()
855  << " row(s), but the local graph claims " << k_local_graph_.numRows ()
856  << " row(s).");
857  // NOTE (mfh 17 Mar 2014) getNodeNumRows() returns
858  // rowMap_->getNodeNumElements(), but it doesn't have to.
859  // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
860  // k_local_graph_.numRows () != getNodeNumRows (), std::runtime_error,
861  // ": The input row Map and the input local graph need to have the same "
862  // "number of rows. The row Map claims " << getNodeNumRows () << " row(s), "
863  // "but the local graph claims " << k_local_graph_.numRows () << " row(s).");
864  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
865  k_lclInds1D_.extent (0) != 0 || k_gblInds1D_.extent (0) != 0, std::logic_error,
866  ": cannot have 1D data structures allocated.");
867  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
868  ! lclInds2D_.is_null () || ! gblInds2D_.is_null (), std::logic_error,
869  ": cannot have 2D data structures allocated.");
870 
871  setDomainRangeMaps (domainMap.is_null() ? rowMap_ : domainMap,
872  rangeMap .is_null() ? rowMap_ : rangeMap);
873  Teuchos::Array<int> remotePIDs (0); // unused output argument
874  this->makeImportExport (remotePIDs, false);
875 
876  k_lclInds1D_ = lclGraph_.entries;
877  k_rowPtrs_ = lclGraph_.row_map;
878 
879  const bool callComputeGlobalConstants = params.get () == nullptr ||
880  params->get ("compute global constants", true);
881  const bool computeLocalTriangularConstants = params.get () == nullptr ||
882  params->get ("compute local triangular constants", true);
883 
884  if (callComputeGlobalConstants) {
885  this->computeGlobalConstants (computeLocalTriangularConstants);
886  }
887  this->fillComplete_ = true;
888  this->checkInternalState ();
889  }
890 
891  template <class LocalOrdinal, class GlobalOrdinal, class Node>
893  CrsGraph (const local_graph_type& lclGraph,
894  const Teuchos::RCP<const map_type>& rowMap,
895  const Teuchos::RCP<const map_type>& colMap,
896  const Teuchos::RCP<const map_type>& domainMap,
897  const Teuchos::RCP<const map_type>& rangeMap,
898  const Teuchos::RCP<const import_type>& importer,
899  const Teuchos::RCP<const export_type>& exporter,
900  const Teuchos::RCP<Teuchos::ParameterList>& params) :
901  DistObject<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, node_type> (rowMap),
902  rowMap_ (rowMap),
903  colMap_ (colMap),
904  rangeMap_ (rangeMap.is_null () ? rowMap : rangeMap),
905  domainMap_ (domainMap.is_null () ? rowMap : domainMap),
906  importer_ (importer),
907  exporter_ (exporter),
908  lclGraph_ (lclGraph),
909  nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ()),
910  nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ()),
911  globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ()),
912  globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ()),
913  globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ()),
914  pftype_ (StaticProfile),
915  numAllocForAllRows_ (0),
916  storageStatus_ (::Tpetra::Details::STORAGE_1D_PACKED),
917  indicesAreAllocated_ (true),
918  indicesAreLocal_ (true),
919  indicesAreGlobal_ (false),
920  fillComplete_ (false), // not yet, but see below
921  lowerTriangular_ (false),
922  upperTriangular_ (false),
923  indicesAreSorted_ (true),
924  noRedundancies_ (true),
925  haveLocalConstants_ (false),
926  haveGlobalConstants_ (false),
927  sortGhostsAssociatedWithEachProcessor_ (true)
928  {
929  staticAssertions();
930  const char tfecfFuncName[] = "Tpetra::CrsGraph(local_graph_type,"
931  "Map,Map,Map,Map,Import,Export,params): ";
932 
933  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
934  (colMap.is_null (), std::runtime_error,
935  "The input column Map must be nonnull.");
936 
937  k_lclInds1D_ = lclGraph_.entries;
938  k_rowPtrs_ = lclGraph_.row_map;
939  const bool callComputeGlobalConstants =
940  params.get () == nullptr ||
941  params->get ("compute global constants", true);
942  const bool computeLocalTriangularConstants =
943  params.get () == nullptr ||
944  params->get ("compute local triangular constants", true);
945  if (callComputeGlobalConstants) {
946  this->computeGlobalConstants (computeLocalTriangularConstants);
947  }
948  fillComplete_ = true;
950  }
951 
952  template <class LocalOrdinal, class GlobalOrdinal, class Node>
953  Teuchos::RCP<const Teuchos::ParameterList>
956  {
957  using Teuchos::RCP;
958  using Teuchos::ParameterList;
959  using Teuchos::parameterList;
960 
961  RCP<ParameterList> params = parameterList ("Tpetra::CrsGraph");
962 
963  // Make a sublist for the Import.
964  RCP<ParameterList> importSublist = parameterList ("Import");
965 
966  // FIXME (mfh 02 Apr 2012) We should really have the Import and
967  // Export objects fill in these lists. However, we don't want to
968  // create an Import or Export unless we need them. For now, we
969  // know that the Import and Export just pass the list directly to
970  // their Distributor, so we can create a Distributor here
971  // (Distributor's constructor is a lightweight operation) and have
972  // it fill in the list.
973 
974  // Fill in Distributor default parameters by creating a
975  // Distributor and asking it to do the work.
976  Distributor distributor (rowMap_->getComm (), importSublist);
977  params->set ("Import", *importSublist, "How the Import performs communication.");
978 
979  // Make a sublist for the Export. For now, it's a clone of the
980  // Import sublist. It's not a shallow copy, though, since we
981  // might like the Import to do communication differently than the
982  // Export.
983  params->set ("Export", *importSublist, "How the Export performs communication.");
984 
985  return params;
986  }
987 
988  template <class LocalOrdinal, class GlobalOrdinal, class Node>
989  void
991  setParameterList (const Teuchos::RCP<Teuchos::ParameterList>& params)
992  {
993  Teuchos::RCP<const Teuchos::ParameterList> validParams =
994  getValidParameters ();
995  params->validateParametersAndSetDefaults (*validParams);
996  this->setMyParamList (params);
997  }
998 
999  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1003  {
1004  return rowMap_->getGlobalNumElements ();
1005  }
1006 
1007  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1011  {
1012  const char tfecfFuncName[] = "getGlobalNumCols: ";
1013  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1014  ! isFillComplete () || getDomainMap ().is_null (), std::runtime_error,
1015  "The graph does not have a domain Map. You may not call this method in "
1016  "that case.");
1017  return getDomainMap ()->getGlobalNumElements ();
1018  }
1019 
1020  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1021  size_t
1024  {
1025  return this->rowMap_.is_null () ?
1026  static_cast<size_t> (0) :
1027  this->rowMap_->getNodeNumElements ();
1028  }
1029 
1030  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1031  size_t
1034  {
1035  const char tfecfFuncName[] = "getNodeNumCols: ";
1036  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1037  ! hasColMap (), std::runtime_error,
1038  "The graph does not have a column Map. You may not call this method "
1039  "unless the graph has a column Map. This requires either that a custom "
1040  "column Map was given to the constructor, or that fillComplete() has "
1041  "been called.");
1042  return colMap_.is_null () ? static_cast<size_t> (0) :
1043  colMap_->getNodeNumElements ();
1044  }
1045 
1046 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
1047  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1048  global_size_t TPETRA_DEPRECATED
1050  getGlobalNumDiags () const
1051  {
1052  return this->getGlobalNumDiagsImpl ();
1053  }
1054 
1055  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1056  size_t TPETRA_DEPRECATED
1057  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1058  getNodeNumDiags () const
1059  {
1060  return this->getNodeNumDiagsImpl ();
1061  }
1062 
1063  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1065  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1066  getGlobalNumDiagsImpl () const
1067  {
1068  const char tfecfFuncName[] = "getGlobalNumDiags: ";
1069  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1070  (! this->haveGlobalConstants_, std::logic_error,
1071  "The graph does not have global constants computed, "
1072  "but the user has requested them.");
1073 
1074  return globalNumDiags_;
1075  }
1076 
1077  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1078  size_t
1079  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1080  getNodeNumDiagsImpl () const
1081  {
1082  return nodeNumDiags_;
1083  }
1084 #endif // TPETRA_ENABLE_DEPRECATED_CODE
1085 
1086 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
1087  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1088  TPETRA_DEPRECATED
1089  Teuchos::RCP<Node>
1090  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1091  getNode () const
1092  {
1093  return Teuchos::null;
1094  }
1095 #endif // TPETRA_ENABLE_DEPRECATED_CODE
1096 
1097  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1098  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
1100  getRowMap () const
1101  {
1102  return rowMap_;
1103  }
1104 
1105  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1106  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
1108  getColMap () const
1109  {
1110  return colMap_;
1111  }
1112 
1113  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1114  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
1117  {
1118  return domainMap_;
1119  }
1120 
1121  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1122  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
1125  {
1126  return rangeMap_;
1127  }
1128 
1129  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1130  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::import_type>
1133  {
1134  return importer_;
1135  }
1136 
1137  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1138  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::export_type>
1141  {
1142  return exporter_;
1143  }
1144 
1145  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1146  bool
1148  hasColMap () const
1149  {
1150  return ! colMap_.is_null ();
1151  }
1152 
1153  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1154  bool
1157  {
1158  // FIXME (mfh 07 Aug 2014) Why wouldn't storage be optimized if
1159  // getNodeNumRows() is zero?
1160 
1161  const bool isOpt = indicesAreAllocated_ &&
1162  k_numRowEntries_.extent (0) == 0 &&
1163  getNodeNumRows () > 0;
1164 
1165  const char tfecfFuncName[] = "isStorageOptimized: ";
1166  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1167  (isOpt && getProfileType () != StaticProfile, std::logic_error,
1168  "The matrix claims to have optimized storage, but getProfileType() "
1169  "returns DynamicProfile. This should never happen. Please report this "
1170  "bug to the Tpetra developers.");
1171 
1172  return isOpt;
1173  }
1174 
1175  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1176  ProfileType
1179  {
1180  return pftype_;
1181  }
1182 
1183  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1187  {
1188  const char tfecfFuncName[] = "getGlobalNumEntries: ";
1189  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1190  (! this->haveGlobalConstants_, std::logic_error,
1191  "The graph does not have global constants computed, "
1192  "but the user has requested them.");
1193 
1194  return globalNumEntries_;
1195  }
1196 
1197  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1198  size_t
1201  {
1202  typedef LocalOrdinal LO;
1203 
1204  if (this->indicesAreAllocated_) {
1205  const LO lclNumRows = this->getNodeNumRows ();
1206  if (lclNumRows == 0) {
1207  return static_cast<size_t> (0);
1208  }
1209  else {
1210  // Avoid the "*this capture" issue by creating a local Kokkos::View.
1211  auto numEntPerRow = this->k_numRowEntries_;
1212  const LO numNumEntPerRow = numEntPerRow.extent (0);
1213  if (numNumEntPerRow == 0) {
1214  if (static_cast<LO> (this->lclGraph_.row_map.extent (0)) <
1215  static_cast<LO> (lclNumRows + 1)) {
1216  return static_cast<size_t> (0);
1217  }
1218  else {
1219  return ::Tpetra::Details::getEntryOnHost (this->lclGraph_.row_map, lclNumRows);
1220  }
1221  }
1222  else { // k_numRowEntries_ is populated
1223  // k_numRowEntries_ is actually be a host View, so we run
1224  // the sum in its native execution space. This also means
1225  // that we can use explicit capture (which could perhaps
1226  // improve build time) instead of KOKKOS_LAMBDA, and avoid
1227  // any CUDA build issues with trying to run a __device__ -
1228  // only function on host.
1229  typedef typename num_row_entries_type::execution_space
1230  host_exec_space;
1231  typedef Kokkos::RangePolicy<host_exec_space, LO> range_type;
1232 
1233  const LO upperLoopBound = lclNumRows < numNumEntPerRow ?
1234  lclNumRows :
1235  numNumEntPerRow;
1236  size_t nodeNumEnt = 0;
1237  Kokkos::parallel_reduce ("Tpetra::CrsGraph::getNumNodeEntries",
1238  range_type (0, upperLoopBound),
1239  [=] (const LO& k, size_t& lclSum) {
1240  lclSum += numEntPerRow(k);
1241  }, nodeNumEnt);
1242  return nodeNumEnt;
1243  }
1244  }
1245  }
1246  else { // nothing allocated on this process, so no entries
1247  return static_cast<size_t> (0);
1248  }
1249  }
1250 
1251  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1255  {
1256  const char tfecfFuncName[] = "getGlobalMaxNumRowEntries: ";
1257  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1258  (! this->haveGlobalConstants_, std::logic_error,
1259  "The graph does not have global constants computed, "
1260  "but the user has requested them.");
1261 
1262  return globalMaxNumRowEntries_;
1263  }
1264 
1265  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1266  size_t
1269  {
1270  return nodeMaxNumRowEntries_;
1271  }
1272 
1273  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1274  bool
1277  {
1278  return fillComplete_;
1279  }
1280 
1281  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1282  bool
1285  {
1286  return ! fillComplete_;
1287  }
1288 
1289 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
1290  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1291  bool TPETRA_DEPRECATED
1293  isLowerTriangular () const
1294  {
1295  return this->isLowerTriangularImpl ();
1296  }
1297 
1298  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1299  bool TPETRA_DEPRECATED
1300  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1301  isUpperTriangular () const
1302  {
1303  return this->isUpperTriangularImpl ();
1304  }
1305 
1306  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1307  bool
1308  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1309  isLowerTriangularImpl () const
1310  {
1311  return this->lowerTriangular_;
1312  }
1313 
1314  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1315  bool
1316  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1317  isUpperTriangularImpl () const
1318  {
1319  return this->upperTriangular_;
1320  }
1321 #endif // TPETRA_ENABLE_DEPRECATED_CODE
1322 
1323  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1324  bool
1327  {
1328  return indicesAreLocal_;
1329  }
1330 
1331  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1332  bool
1335  {
1336  return indicesAreGlobal_;
1337  }
1338 
1339  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1340  size_t
1343  {
1344  typedef LocalOrdinal LO;
1345 
1346  if (this->indicesAreAllocated_) {
1347  const LO lclNumRows = this->getNodeNumRows ();
1348  if (lclNumRows == 0) {
1349  return static_cast<size_t> (0);
1350  }
1351  else if (this->storageStatus_ == ::Tpetra::Details::STORAGE_1D_PACKED) {
1352  if (static_cast<LO> (this->lclGraph_.row_map.extent (0)) <
1353  static_cast<LO> (lclNumRows + 1)) {
1354  return static_cast<size_t> (0);
1355  }
1356  else {
1357  return ::Tpetra::Details::getEntryOnHost (this->lclGraph_.row_map, lclNumRows);
1358  }
1359  }
1360  else if (this->storageStatus_ == ::Tpetra::Details::STORAGE_1D_UNPACKED) {
1361  if (this->k_rowPtrs_.extent (0) == 0) {
1362  return static_cast<size_t> (0);
1363  }
1364  else {
1365  return ::Tpetra::Details::getEntryOnHost (this->k_rowPtrs_, lclNumRows);
1366  }
1367  }
1368  else if (this->storageStatus_ == ::Tpetra::Details::STORAGE_2D) {
1369  size_t numAllocated = 0;
1370  if (this->isLocallyIndexed ()) {
1371  for (LocalOrdinal lclRow = 0; lclRow < lclNumRows; ++lclRow) {
1372  numAllocated += this->lclInds2D_[lclRow].size ();
1373  }
1374  }
1375  else if (this->isGloballyIndexed ()) {
1376  for (LocalOrdinal lclRow = 0; lclRow < lclNumRows; ++lclRow) {
1377  numAllocated += this->gblInds2D_[lclRow].size ();
1378  }
1379  }
1380  // Neither locally nor globally indexed, means no indices allocated.
1381  return numAllocated;
1382  }
1383  else {
1384  return static_cast<size_t> (0);
1385  }
1386  }
1387  else {
1388  return Tpetra::Details::OrdinalTraits<size_t>::invalid ();
1389  }
1390  }
1391 
1392  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1393  Teuchos::RCP<const Teuchos::Comm<int> >
1395  getComm () const
1396  {
1397  return this->rowMap_.is_null () ? Teuchos::null : this->rowMap_->getComm ();
1398  }
1399 
1400  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1401  GlobalOrdinal
1404  {
1405  return rowMap_->getIndexBase ();
1406  }
1407 
1408  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1409  bool
1411  indicesAreAllocated () const
1412  {
1413  return indicesAreAllocated_;
1414  }
1415 
1416  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1417  bool
1419  isSorted () const
1420  {
1421  return indicesAreSorted_;
1422  }
1423 
1424  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1425  bool
1427  isMerged () const
1428  {
1429  return noRedundancies_;
1430  }
1431 
1432  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1433  void
1436  {
1437  // FIXME (mfh 07 May 2013) How do we know that the change
1438  // introduced a redundancy, or even that it invalidated the sorted
1439  // order of indices? CrsGraph has always made this conservative
1440  // guess. It could be a bit costly to check at insertion time,
1441  // though.
1442  indicesAreSorted_ = false;
1443  noRedundancies_ = false;
1444 
1445  // We've modified the graph, so we'll have to recompute local
1446  // constants like the number of diagonal entries on this process.
1447  haveLocalConstants_ = false;
1448  }
1449 
1450  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1451  void
1453  allocateIndices (const ELocalGlobal lg)
1454  {
1455  using Teuchos::arcp;
1456  using Teuchos::Array;
1457  using Teuchos::ArrayRCP;
1458  typedef Teuchos::ArrayRCP<size_t>::size_type size_type;
1459  typedef typename local_graph_type::row_map_type::non_const_type
1460  non_const_row_map_type;
1461  typedef typename local_graph_type::entries_type::non_const_type
1462  lcl_col_inds_type;
1463  typedef Kokkos::View<GlobalOrdinal*,
1464  typename lcl_col_inds_type::array_layout,
1465  device_type> gbl_col_inds_type;
1466  const char tfecfFuncName[] = "allocateIndices: ";
1467  const char suffix[] = " Please report this bug to the Tpetra developers.";
1468 
1469  // This is a protected function, only callable by us. If it was
1470  // called incorrectly, it is our fault. That's why the tests
1471  // below throw std::logic_error instead of std::invalid_argument.
1472  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1473  (this->isLocallyIndexed () && lg == GlobalIndices, std::logic_error,
1474  "The graph is locally indexed, but Tpetra code is calling this method "
1475  "with lg=GlobalIndices." << suffix);
1476  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1477  (this->isGloballyIndexed () && lg == LocalIndices, std::logic_error,
1478  "The graph is globally indexed, but Tpetra code is calling this method "
1479  "with lg=LocalIndices. " << suffix);
1480  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1481  (this->indicesAreAllocated (), std::logic_error, "The graph's indices "
1482  "are already allocated, but Tpetra is calling allocateIndices again."
1483  << suffix);
1484  const size_t numRows = this->getNodeNumRows ();
1485 
1486  if (this->getProfileType () == StaticProfile) {
1487  //
1488  // STATIC ALLOCATION PROFILE
1489  //
1490  non_const_row_map_type k_rowPtrs ("Tpetra::CrsGraph::ptr", numRows + 1);
1491 
1492  if (this->k_numAllocPerRow_.extent (0) != 0) {
1493  // It's OK to throw std::invalid_argument here, because we
1494  // haven't incurred any side effects yet. Throwing that
1495  // exception (and not, say, std::logic_error) implies that the
1496  // instance can recover.
1497  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1498  (this->k_numAllocPerRow_.extent (0) != numRows,
1499  std::invalid_argument, "k_numAllocPerRow_ is allocated, that is, "
1500  "has nonzero length " << this->k_numAllocPerRow_.extent (0)
1501  << ", but its length != numRows = " << numRows << ".");
1502 
1503  // k_numAllocPerRow_ is a host View, but k_rowPtrs (the thing
1504  // we want to compute here) lives on device. That's OK;
1505  // computeOffsetsFromCounts can handle this case.
1507 
1508  // FIXME (mfh 27 Jun 2016) Currently, computeOffsetsFromCounts
1509  // doesn't attempt to check its input for "invalid" flag
1510  // values. For now, we omit that feature of the sequential
1511  // code disabled below.
1512  computeOffsetsFromCounts (k_rowPtrs, k_numAllocPerRow_);
1513  }
1514  else {
1515  // It's OK to throw std::invalid_argument here, because we
1516  // haven't incurred any side effects yet. Throwing that
1517  // exception (and not, say, std::logic_error) implies that the
1518  // instance can recover.
1519  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1520  (this->numAllocForAllRows_ ==
1521  Tpetra::Details::OrdinalTraits<size_t>::invalid (),
1522  std::invalid_argument, "numAllocForAllRows_ has an invalid value, "
1523  "namely Tpetra::Details::OrdinalTraits<size_t>::invalid() = " <<
1524  Tpetra::Details::OrdinalTraits<size_t>::invalid () << ".");
1525 
1527  computeOffsetsFromConstantCount (k_rowPtrs, this->numAllocForAllRows_);
1528  }
1529 
1530  // "Commit" the resulting row offsets.
1531  this->k_rowPtrs_ = k_rowPtrs;
1532 
1533  const size_type numInds = ::Tpetra::Details::getEntryOnHost (this->k_rowPtrs_, numRows);
1534  // const size_type numInds = static_cast<size_type> (this->k_rowPtrs_(numRows));
1535  if (lg == LocalIndices) {
1536  k_lclInds1D_ = lcl_col_inds_type ("Tpetra::CrsGraph::ind", numInds);
1537  }
1538  else {
1539  k_gblInds1D_ = gbl_col_inds_type ("Tpetra::CrsGraph::ind", numInds);
1540  }
1541  storageStatus_ = ::Tpetra::Details::STORAGE_1D_UNPACKED;
1542  }
1543  else {
1544  //
1545  // DYNAMIC ALLOCATION PROFILE
1546  //
1547  const bool useNumAllocPerRow =
1548  (this->k_numAllocPerRow_.extent (0) != 0);
1549 
1550  if (lg == LocalIndices) {
1551  this->lclInds2D_ = arcp<Array<LocalOrdinal> > (numRows);
1552  for (size_t i = 0; i < numRows; ++i) {
1553  const size_t howMany = useNumAllocPerRow ?
1554  this->k_numAllocPerRow_(i) :
1555  this->numAllocForAllRows_;
1556  if (howMany > 0) {
1557  this->lclInds2D_[i].resize (howMany);
1558  }
1559  }
1560  }
1561  else { // allocate global indices
1562  this->gblInds2D_ = arcp<Array<GlobalOrdinal> > (numRows);
1563  for (size_t i = 0; i < numRows; ++i) {
1564  const size_t howMany = useNumAllocPerRow ?
1565  this->k_numAllocPerRow_(i) :
1566  this->numAllocForAllRows_;
1567  if (howMany > 0) {
1568  this->gblInds2D_[i].resize (howMany);
1569  }
1570  }
1571  }
1572  this->storageStatus_ = ::Tpetra::Details::STORAGE_2D;
1573  }
1574 
1575  this->indicesAreLocal_ = (lg == LocalIndices);
1576  this->indicesAreGlobal_ = (lg == GlobalIndices);
1577 
1578  if (numRows > 0) { // reallocate k_numRowEntries_ & fill w/ 0s
1579  using Kokkos::ViewAllocateWithoutInitializing;
1580  typedef decltype (k_numRowEntries_) row_ent_type;
1581  const char label[] = "Tpetra::CrsGraph::numRowEntries";
1582 
1583  row_ent_type numRowEnt (ViewAllocateWithoutInitializing (label), numRows);
1584  Kokkos::deep_copy (numRowEnt, static_cast<size_t> (0)); // fill w/ 0s
1585  this->k_numRowEntries_ = numRowEnt; // "commit" our allocation
1586  }
1587 
1588  // Once indices are allocated, CrsGraph needs to free this information.
1589  this->numAllocForAllRows_ = 0;
1590  this->k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
1591  this->indicesAreAllocated_ = true;
1592 
1593  try {
1594  this->checkInternalState ();
1595  }
1596  catch (std::logic_error& e) {
1597  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1598  (true, std::logic_error, "At end of allocateIndices, "
1599  "checkInternalState threw std::logic_error: "
1600  << e.what ());
1601  }
1602  catch (std::exception& e) {
1603  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1604  (true, std::runtime_error, "At end of allocateIndices, "
1605  "checkInternalState threw std::exception: "
1606  << e.what ());
1607  }
1608  catch (...) {
1609  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1610  (true, std::runtime_error, "At end of allocateIndices, "
1611  "checkInternalState threw an exception "
1612  "not a subclass of std::exception.");
1613  }
1614  }
1615 
1616  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1617  Teuchos::ArrayView<const LocalOrdinal>
1619  getLocalView (const RowInfo& rowinfo) const
1620  {
1621  using Kokkos::subview;
1622  typedef LocalOrdinal LO;
1623  typedef Kokkos::View<const LO*, execution_space,
1624  Kokkos::MemoryUnmanaged> row_view_type;
1625 
1626  if (rowinfo.allocSize == 0) {
1627  return Teuchos::ArrayView<const LO> ();
1628  }
1629  else { // nothing in the row to view
1630  if (k_lclInds1D_.extent (0) != 0) { // 1-D storage
1631  const size_t start = rowinfo.offset1D;
1632  const size_t len = rowinfo.allocSize;
1633  const std::pair<size_t, size_t> rng (start, start + len);
1634  // mfh 23 Nov 2015: Don't just create a subview of
1635  // k_lclInds1D_ directly, because that first creates a
1636  // _managed_ subview, then returns an unmanaged version of
1637  // that. That touches the reference count, which costs
1638  // performance in a measurable way.
1639  row_view_type rowView = subview (row_view_type (k_lclInds1D_), rng);
1640  const LO* const rowViewRaw = (len == 0) ? nullptr : rowView.data ();
1641  return Teuchos::ArrayView<const LO> (rowViewRaw, len, Teuchos::RCP_DISABLE_NODE_LOOKUP);
1642  }
1643  else if (! lclInds2D_[rowinfo.localRow].empty ()) { // 2-D storage
1644  return lclInds2D_[rowinfo.localRow] ();
1645  }
1646  else {
1647  return Teuchos::ArrayView<const LO> (); // nothing in the row to view
1648  }
1649  }
1650  }
1651 
1652  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1653  LocalOrdinal
1655  getLocalViewRawConst (const LocalOrdinal*& lclInds,
1656  LocalOrdinal& capacity,
1657  const RowInfo& rowInfo) const
1658  {
1659  lclInds = nullptr;
1660  capacity = 0;
1661 #ifdef HAVE_TPETRA_DEBUG
1662  constexpr bool debug = true;
1663 #else
1664  constexpr bool debug = false;
1665 #endif // HAVE_TPETRA_DEBUG
1666 
1667  if (rowInfo.allocSize != 0) {
1668  if (k_lclInds1D_.extent (0) != 0) { // 1-D storage
1669  if (debug) {
1670  if (rowInfo.offset1D + rowInfo.allocSize >
1671  static_cast<size_t> (k_lclInds1D_.extent (0))) {
1672  return static_cast<LocalOrdinal> (-1);
1673  }
1674  }
1675  lclInds = &k_lclInds1D_[rowInfo.offset1D];
1676  capacity = rowInfo.allocSize;
1677  }
1678  else { // 2-D storage
1679  if (debug) {
1680  if (rowInfo.localRow >= static_cast<size_t> (lclInds2D_.size ())) {
1681  return static_cast<LocalOrdinal> (-1);
1682  }
1683  }
1684  // Use a const reference so we don't touch the ArrayRCP's ref
1685  // count, since ArrayRCP's ref count is not thread safe.
1686  const auto& curRow = lclInds2D_[rowInfo.localRow];
1687  if (! curRow.empty ()) {
1688  lclInds = curRow.getRawPtr ();
1689  capacity = curRow.size ();
1690  }
1691  }
1692  }
1693  return static_cast<LocalOrdinal> (0);
1694  }
1695 
1696  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1697  Teuchos::ArrayView<LocalOrdinal>
1700  {
1701  using Kokkos::subview;
1702  typedef LocalOrdinal LO;
1703  typedef Kokkos::View<LO*, execution_space,
1704  Kokkos::MemoryUnmanaged> row_view_type;
1705 
1706  if (rowinfo.allocSize == 0) { // nothing in the row to view
1707  return Teuchos::ArrayView<LO> ();
1708  }
1709  else {
1710  if (k_lclInds1D_.extent (0) != 0) { // 1-D storage
1711  const size_t start = rowinfo.offset1D;
1712  const size_t len = rowinfo.allocSize;
1713  const std::pair<size_t, size_t> rng (start, start + len);
1714  // mfh 23 Nov 2015: Don't just create a subview of
1715  // k_lclInds1D_ directly, because that first creates a
1716  // _managed_ subview, then returns an unmanaged version of
1717  // that. That touches the reference count, which costs
1718  // performance in a measurable way.
1719  row_view_type rowView = subview (row_view_type (k_lclInds1D_), rng);
1720  LO* const rowViewRaw = (len == 0) ? nullptr : rowView.data ();
1721  return Teuchos::ArrayView<LO> (rowViewRaw, len, Teuchos::RCP_DISABLE_NODE_LOOKUP);
1722  }
1723  else if (! lclInds2D_[rowinfo.localRow].empty ()) { // 2-D storage
1724  return lclInds2D_[rowinfo.localRow] ();
1725  }
1726  else {
1727  return Teuchos::ArrayView<LO> (); // nothing in the row to view
1728  }
1729  }
1730  }
1731 
1732 
1733  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1734  Kokkos::View<const LocalOrdinal*,
1736  Kokkos::MemoryUnmanaged>
1738  getLocalKokkosRowView (const RowInfo& rowInfo) const
1739  {
1740  typedef LocalOrdinal LO;
1741  typedef Kokkos::View<const LO*, execution_space,
1742  Kokkos::MemoryUnmanaged> row_view_type;
1743 
1744  if (rowInfo.allocSize == 0) {
1745  return row_view_type ();
1746  }
1747  else { // nothing in the row to view
1748  if (k_lclInds1D_.extent (0) != 0) { // 1-D storage
1749  const size_t start = rowInfo.offset1D;
1750  const size_t len = rowInfo.allocSize;
1751  const std::pair<size_t, size_t> rng (start, start + len);
1752  // mfh 23 Nov 2015: Don't just create a subview of
1753  // k_lclInds1D_ directly, because that first creates a
1754  // _managed_ subview, then returns an unmanaged version of
1755  // that. That touches the reference count, which costs
1756  // performance in a measurable way.
1757  return Kokkos::subview (row_view_type (k_lclInds1D_), rng);
1758  }
1759  else if (! this->lclInds2D_[rowInfo.localRow].empty ()) { // 2-D storage
1760  // Use a reference, so that I don't touch the
1761  // Teuchos::ArrayView reference count in a debug build. (It
1762  // has no reference count in a release build.) This ensures
1763  // thread safety.
1764  //
1765  // lclInds2D_ lives on host, so this code does not assume UVM.
1766  Teuchos::Array<LO>& lclInds = this->lclInds2D_[rowInfo.localRow];
1767  return row_view_type (lclInds.getRawPtr (), lclInds.size ());
1768  }
1769  else {
1770  return row_view_type (); // nothing in the row to view
1771  }
1772  }
1773  }
1774 
1775 
1776  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1777  Kokkos::View<LocalOrdinal*,
1778  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::execution_space,
1779  Kokkos::MemoryUnmanaged>
1780  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1781  getLocalKokkosRowViewNonConst (const RowInfo& rowInfo)
1782  {
1783  typedef LocalOrdinal LO;
1784  typedef Kokkos::View<LO*, execution_space,
1785  Kokkos::MemoryUnmanaged> row_view_type;
1786 
1787  if (rowInfo.allocSize == 0) {
1788  return row_view_type ();
1789  }
1790  else { // nothing in the row to view
1791  if (k_lclInds1D_.extent (0) != 0) { // 1-D storage
1792  const size_t start = rowInfo.offset1D;
1793  const size_t len = rowInfo.allocSize;
1794  const std::pair<size_t, size_t> rng (start, start + len);
1795  // mfh 23 Nov 2015: Don't just create a subview of
1796  // k_lclInds1D_ directly, because that first creates a
1797  // _managed_ subview, then returns an unmanaged version of
1798  // that. That touches the reference count, which costs
1799  // performance in a measurable way.
1800  return Kokkos::subview (row_view_type (this->k_lclInds1D_), rng);
1801  }
1802  else if (! this->lclInds2D_[rowInfo.localRow].empty ()) { // 2-D storage
1803  // Use a reference, so that I don't touch the
1804  // Teuchos::ArrayView reference count in a debug build. (It
1805  // has no reference count in a release build.) This ensures
1806  // thread safety.
1807  //
1808  // lclInds2D_ lives on host, so this code does not assume UVM.
1809  Teuchos::Array<LO>& cols = this->lclInds2D_[rowInfo.localRow];
1810  LO* const colsRaw = cols.getRawPtr ();
1811  return row_view_type (colsRaw, cols.size ());
1812  }
1813  else {
1814  return row_view_type (); // nothing in the row to view
1815  }
1816  }
1817  }
1818 
1819 
1820  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1821  Kokkos::View<const GlobalOrdinal*,
1822  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::execution_space,
1823  Kokkos::MemoryUnmanaged>
1824  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1825  getGlobalKokkosRowView (const RowInfo& rowinfo) const
1826  {
1827  typedef GlobalOrdinal GO;
1828  typedef Kokkos::View<const GO*, execution_space,
1829  Kokkos::MemoryUnmanaged> row_view_type;
1830 
1831  if (rowinfo.allocSize == 0) {
1832  return row_view_type ();
1833  }
1834  else { // nothing in the row to view
1835  if (this->k_gblInds1D_.extent (0) != 0) { // 1-D storage
1836  const size_t start = rowinfo.offset1D;
1837  const size_t len = rowinfo.allocSize;
1838  const std::pair<size_t, size_t> rng (start, start + len);
1839  // mfh 23 Nov 2015: Don't just create a subview of
1840  // k_gblInds1D_ directly, because that first creates a
1841  // _managed_ subview, then returns an unmanaged version of
1842  // that. That touches the reference count, which costs
1843  // performance in a measurable way.
1844  return Kokkos::subview (row_view_type (this->k_gblInds1D_), rng);
1845  }
1846  else if (! this->gblInds2D_[rowinfo.localRow].empty ()) { // 2-D storage
1847  // Use a reference, so that I don't touch the
1848  // Teuchos::ArrayView reference count in a debug build. (It
1849  // has no reference count in a release build.) This ensures
1850  // thread safety.
1851  //
1852  // gblInds2D_ lives on host, so this code does not assume UVM.
1853  Teuchos::Array<GO>& cols = this->gblInds2D_[rowinfo.localRow];
1854  return row_view_type (cols.getRawPtr (), cols.size ());
1855  }
1856  else {
1857  return row_view_type (); // nothing in the row to view
1858  }
1859  }
1860  }
1861 
1862 
1863  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1864  Teuchos::ArrayView<const GlobalOrdinal>
1866  getGlobalView (const RowInfo& rowinfo) const
1867  {
1868  Teuchos::ArrayView<const GlobalOrdinal> view;
1869  if (rowinfo.allocSize > 0) {
1870  if (k_gblInds1D_.extent (0) != 0) {
1871  auto rng = std::make_pair (rowinfo.offset1D,
1872  rowinfo.offset1D + rowinfo.allocSize);
1873  // mfh 23 Nov 2015: Don't just create a subview of
1874  // k_gblInds1D_ directly, because that first creates a
1875  // _managed_ subview, then returns an unmanaged version of
1876  // that. That touches the reference count, which costs
1877  // performance in a measurable way.
1878  Kokkos::View<const GlobalOrdinal*, execution_space,
1879  Kokkos::MemoryUnmanaged> k_gblInds1D_unmanaged = k_gblInds1D_;
1880  view = Kokkos::Compat::getConstArrayView (Kokkos::subview (k_gblInds1D_unmanaged, rng));
1881  }
1882  else if (! gblInds2D_[rowinfo.localRow].empty()) {
1883  view = gblInds2D_[rowinfo.localRow] ();
1884  }
1885  }
1886  return view;
1887  }
1888 
1889 
1890  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1891  LocalOrdinal
1893  getGlobalViewRawConst (const GlobalOrdinal*& gblInds,
1894  LocalOrdinal& capacity,
1895  const RowInfo& rowInfo) const
1896  {
1897  gblInds = nullptr;
1898  capacity = 0;
1899 #ifdef HAVE_TPETRA_DEBUG
1900  constexpr bool debug = true;
1901 #else
1902  constexpr bool debug = false;
1903 #endif // HAVE_TPETRA_DEBUG
1904 
1905  if (rowInfo.allocSize != 0) {
1906  if (k_gblInds1D_.extent (0) != 0) { // 1-D storage
1907  if (debug) {
1908  if (rowInfo.offset1D + rowInfo.allocSize >
1909  static_cast<size_t> (k_gblInds1D_.extent (0))) {
1910  return static_cast<LocalOrdinal> (-1);
1911  }
1912  }
1913  gblInds = &k_gblInds1D_[rowInfo.offset1D];
1914  capacity = rowInfo.allocSize;
1915  }
1916  else {
1917  if (debug) {
1918  if (rowInfo.localRow >= static_cast<size_t> (gblInds2D_.size ())) {
1919  return static_cast<LocalOrdinal> (-1);
1920  }
1921  }
1922  const auto& curRow = gblInds2D_[rowInfo.localRow];
1923  if (! curRow.empty ()) {
1924  gblInds = curRow.getRawPtr ();
1925  capacity = curRow.size ();
1926  }
1927  }
1928  }
1929  return static_cast<LocalOrdinal> (0);
1930  }
1931 
1932 
1933  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1934  Teuchos::ArrayView<GlobalOrdinal>
1937  {
1938  Teuchos::ArrayView<GlobalOrdinal> view;
1939  if (rowinfo.allocSize > 0) {
1940  if (k_gblInds1D_.extent (0) != 0) {
1941  auto rng = std::make_pair (rowinfo.offset1D,
1942  rowinfo.offset1D + rowinfo.allocSize);
1943  // mfh 23 Nov 2015: Don't just create a subview of
1944  // k_gblInds1D_ directly, because that first creates a
1945  // _managed_ subview, then returns an unmanaged version of
1946  // that. That touches the reference count, which costs
1947  // performance in a measurable way.
1948  Kokkos::View<GlobalOrdinal*, execution_space,
1949  Kokkos::MemoryUnmanaged> k_gblInds1D_unmanaged = k_gblInds1D_;
1950  view = Kokkos::Compat::getArrayView (Kokkos::subview (k_gblInds1D_unmanaged, rng));
1951  }
1952  else if (! gblInds2D_[rowinfo.localRow].empty()) {
1953  view = gblInds2D_[rowinfo.localRow] ();
1954  }
1955  }
1956  return view;
1957  }
1958 
1959 
1960  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1961  RowInfo
1963  getRowInfo (const LocalOrdinal myRow) const
1964  {
1965  const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1966  RowInfo ret;
1967  if (this->rowMap_.is_null () || ! this->rowMap_->isNodeLocalElement (myRow)) {
1968  ret.localRow = STINV;
1969  ret.allocSize = 0;
1970  ret.numEntries = 0;
1971  ret.offset1D = STINV;
1972  return ret;
1973  }
1974 
1975  ret.localRow = static_cast<size_t> (myRow);
1976  if (this->indicesAreAllocated ()) {
1977  if (this->getProfileType () == StaticProfile) {
1978  // Offsets tell us the allocation size in this case.
1979  if (this->k_rowPtrs_.extent (0) == 0) {
1980  ret.offset1D = 0;
1981  ret.allocSize = 0;
1982  }
1983  else {
1984  ret.offset1D = this->k_rowPtrs_(myRow);
1985  ret.allocSize = this->k_rowPtrs_(myRow+1) - this->k_rowPtrs_(myRow);
1986  }
1987 
1988  ret.numEntries = (this->k_numRowEntries_.extent (0) == 0) ?
1989  ret.allocSize :
1990  this->k_numRowEntries_(myRow);
1991  }
1992  else { // DynamicProfile
1993  ret.offset1D = STINV;
1994  if (this->isLocallyIndexed ()) {
1995  ret.allocSize = (this->lclInds2D_.size () == 0) ?
1996  size_t (0) :
1997  this->lclInds2D_[myRow].size ();
1998  }
1999  else if (this->isGloballyIndexed ()) {
2000  ret.allocSize = (this->gblInds2D_.size () == 0) ?
2001  size_t (0) :
2002  this->gblInds2D_[myRow].size ();
2003  }
2004  else { // neither locally nor globally indexed means no indices alloc'd
2005  ret.allocSize = 0;
2006  }
2007 
2008  ret.numEntries = (this->k_numRowEntries_.extent (0) == 0) ?
2009  size_t (0) :
2010  this->k_numRowEntries_(myRow);
2011  }
2012  }
2013  else { // haven't performed allocation yet; probably won't hit this code
2014  // FIXME (mfh 07 Aug 2014) We want graph's constructors to
2015  // allocate, rather than doing lazy allocation at first insert.
2016  // This will make k_numAllocPerRow_ obsolete.
2017  ret.allocSize = (this->k_numAllocPerRow_.extent (0) != 0) ?
2018  this->k_numAllocPerRow_(myRow) : // this is a host View
2019  this->numAllocForAllRows_;
2020  ret.numEntries = 0;
2021  ret.offset1D = STINV;
2022  }
2023 
2024  return ret;
2025  }
2026 
2027 
2028  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2029  RowInfo
2031  getRowInfoFromGlobalRowIndex (const GlobalOrdinal gblRow) const
2032  {
2033  const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
2034  RowInfo ret;
2035  if (this->rowMap_.is_null ()) {
2036  ret.localRow = STINV;
2037  ret.allocSize = 0;
2038  ret.numEntries = 0;
2039  ret.offset1D = STINV;
2040  return ret;
2041  }
2042  const LocalOrdinal myRow = this->rowMap_->getLocalElement (gblRow);
2043  if (myRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid ()) {
2044  ret.localRow = STINV;
2045  ret.allocSize = 0;
2046  ret.numEntries = 0;
2047  ret.offset1D = STINV;
2048  return ret;
2049  }
2050 
2051  ret.localRow = static_cast<size_t> (myRow);
2052  if (this->indicesAreAllocated ()) {
2053  // graph data structures have the info that we need
2054  //
2055  // if static graph, offsets tell us the allocation size
2056  if (this->getProfileType() == StaticProfile) {
2057  if (this->k_rowPtrs_.extent (0) == 0) {
2058  ret.offset1D = 0;
2059  ret.allocSize = 0;
2060  }
2061  else {
2062  ret.offset1D = this->k_rowPtrs_(myRow);
2063  ret.allocSize = this->k_rowPtrs_(myRow+1) - this->k_rowPtrs_(myRow);
2064  }
2065 
2066  ret.numEntries = (this->k_numRowEntries_.extent (0) == 0) ?
2067  ret.allocSize :
2068  this->k_numRowEntries_(myRow);
2069  }
2070  else { // DynamicProfile
2071  ret.offset1D = STINV;
2072  if (this->isLocallyIndexed ()) {
2073  ret.allocSize = (this->lclInds2D_.size () == 0) ?
2074  size_t (0) :
2075  this->lclInds2D_[myRow].size ();
2076  }
2077  else {
2078  ret.allocSize = (this->gblInds2D_.size () == 0) ?
2079  size_t (0) :
2080  this->gblInds2D_[myRow].size ();
2081  }
2082 
2083  ret.numEntries = (this->k_numRowEntries_.extent (0) == 0) ?
2084  size_t (0) :
2085  this->k_numRowEntries_(myRow);
2086  }
2087  }
2088  else { // haven't performed allocation yet; probably won't hit this code
2089  // FIXME (mfh 07 Aug 2014) We want graph's constructors to
2090  // allocate, rather than doing lazy allocation at first insert.
2091  // This will make k_numAllocPerRow_ obsolete.
2092  ret.allocSize = (this->k_numAllocPerRow_.extent (0) != 0) ?
2093  this->k_numAllocPerRow_(myRow) : // this is a host View
2094  this->numAllocForAllRows_;
2095  ret.numEntries = 0;
2096  ret.offset1D = STINV;
2097  }
2098 
2099  return ret;
2100  }
2101 
2102 
2103  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2104  void
2106  staticAssertions () const
2107  {
2108  using Teuchos::OrdinalTraits;
2109  typedef LocalOrdinal LO;
2110  typedef GlobalOrdinal GO;
2111  typedef global_size_t GST;
2112 
2113  // Assumption: sizeof(GlobalOrdinal) >= sizeof(LocalOrdinal):
2114  // This is so that we can store local indices in the memory
2115  // formerly occupied by global indices.
2116  static_assert (sizeof (GlobalOrdinal) >= sizeof (LocalOrdinal),
2117  "Tpetra::CrsGraph: sizeof(GlobalOrdinal) must be >= sizeof(LocalOrdinal).");
2118  // Assumption: max(size_t) >= max(LocalOrdinal)
2119  // This is so that we can represent any LocalOrdinal as a size_t.
2120  static_assert (sizeof (size_t) >= sizeof (LocalOrdinal),
2121  "Tpetra::CrsGraph: sizeof(size_t) must be >= sizeof(LocalOrdinal).");
2122  static_assert (sizeof(GST) >= sizeof(size_t),
2123  "Tpetra::CrsGraph: sizeof(Tpetra::global_size_t) must be >= sizeof(size_t).");
2124 
2125  // FIXME (mfh 30 Sep 2015) We're not using
2126  // Teuchos::CompileTimeAssert any more. Can we do these checks
2127  // with static_assert?
2128 
2129  // can't call max() with CompileTimeAssert, because it isn't a
2130  // constant expression; will need to make this a runtime check
2131  const char msg[] = "Tpetra::CrsGraph: Object cannot be created with the "
2132  "given template arguments: size assumptions are not valid.";
2133  TEUCHOS_TEST_FOR_EXCEPTION(
2134  static_cast<size_t> (Teuchos::OrdinalTraits<LO>::max ()) > Teuchos::OrdinalTraits<size_t>::max (),
2135  std::runtime_error, msg);
2136  TEUCHOS_TEST_FOR_EXCEPTION(
2137  static_cast<GST> (Teuchos::OrdinalTraits<LO>::max ()) > static_cast<GST> (Teuchos::OrdinalTraits<GO>::max ()),
2138  std::runtime_error, msg);
2139  TEUCHOS_TEST_FOR_EXCEPTION(
2140  static_cast<size_t> (Teuchos::OrdinalTraits<GO>::max ()) > Teuchos::OrdinalTraits<GST>::max(),
2141  std::runtime_error, msg);
2142  TEUCHOS_TEST_FOR_EXCEPTION(
2143  Teuchos::OrdinalTraits<size_t>::max () > Teuchos::OrdinalTraits<GST>::max (),
2144  std::runtime_error, msg);
2145  }
2146 
2147 
2148  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2149  size_t
2152  const SLocalGlobalViews &newInds,
2153  const ELocalGlobal lg,
2154  const ELocalGlobal I)
2155  {
2156  using Teuchos::ArrayView;
2157  typedef LocalOrdinal LO;
2158  typedef GlobalOrdinal GO;
2159  const char tfecfFuncName[] = "insertIndices: ";
2160 #ifdef HAVE_TPETRA_DEBUG
2161  constexpr bool debug = true;
2162 #else
2163  constexpr bool debug = false;
2164 #endif // HAVE_TPETRA_DEBUG
2165 
2166  size_t oldNumEnt = 0;
2167  if (debug) {
2168  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2169  (lg != GlobalIndices && lg != LocalIndices, std::invalid_argument,
2170  "lg must be either GlobalIndices or LocalIndices.");
2171  oldNumEnt = this->getNumEntriesInLocalRow (rowinfo.localRow);
2172  }
2173 
2174  size_t numNewInds = 0;
2175  if (lg == GlobalIndices) { // input indices are global
2176  ArrayView<const GO> new_ginds = newInds.ginds;
2177  numNewInds = new_ginds.size();
2178  if (I == GlobalIndices) { // store global indices
2179  ArrayView<GO> gind_view = this->getGlobalViewNonConst (rowinfo);
2180  if (debug) {
2181  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2182  (static_cast<size_t> (gind_view.size ()) <
2183  rowinfo.numEntries + numNewInds, std::logic_error,
2184  "gind_view.size() = " << gind_view.size ()
2185  << " < rowinfo.numEntries (= " << rowinfo.numEntries
2186  << ") + numNewInds (= " << numNewInds << ").");
2187  }
2188  GO* const gblColInds_out = gind_view.getRawPtr () + rowinfo.numEntries;
2189  for (size_t k = 0; k < numNewInds; ++k) {
2190  gblColInds_out[k] = new_ginds[k];
2191  }
2192  }
2193  else if (I == LocalIndices) { // store local indices
2194  ArrayView<LO> lind_view = this->getLocalViewNonConst (rowinfo);
2195  if (debug) {
2196  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2197  (static_cast<size_t> (lind_view.size ()) <
2198  rowinfo.numEntries + numNewInds, std::logic_error,
2199  "lind_view.size() = " << lind_view.size ()
2200  << " < rowinfo.numEntries (= " << rowinfo.numEntries
2201  << ") + numNewInds (= " << numNewInds << ").");
2202  }
2203  LO* const lclColInds_out = lind_view.getRawPtr () + rowinfo.numEntries;
2204  for (size_t k = 0; k < numNewInds; ++k) {
2205  lclColInds_out[k] = colMap_->getLocalElement (new_ginds[k]);
2206  }
2207  }
2208  }
2209  else if (lg == LocalIndices) { // input indices are local
2210  ArrayView<const LO> new_linds = newInds.linds;
2211  numNewInds = new_linds.size();
2212  if (I == LocalIndices) { // store local indices
2213  ArrayView<LO> lind_view = this->getLocalViewNonConst (rowinfo);
2214  if (debug) {
2215  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2216  (static_cast<size_t> (lind_view.size ()) <
2217  rowinfo.numEntries + numNewInds, std::logic_error,
2218  "lind_view.size() = " << lind_view.size ()
2219  << " < rowinfo.numEntries (= " << rowinfo.numEntries
2220  << ") + numNewInds (= " << numNewInds << ").");
2221  }
2222  LO* const lclColInds_out = lind_view.getRawPtr () + rowinfo.numEntries;
2223  for (size_t k = 0; k < numNewInds; ++k) {
2224  lclColInds_out[k] = new_linds[k];
2225  }
2226  }
2227  else if (I == GlobalIndices) {
2228  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2229  (true, std::logic_error, "The case where the input indices are local "
2230  "and the indices to write are global (lg=LocalIndices, I="
2231  "GlobalIndices) is not implemented, because it does not make sense."
2232  << std::endl << "If you have correct local column indices, that "
2233  "means the graph has a column Map. In that case, you should be "
2234  "storing local indices.");
2235  }
2236  }
2237 
2238  rowinfo.numEntries += numNewInds;
2239  this->k_numRowEntries_(rowinfo.localRow) += numNewInds;
2240  this->setLocallyModified ();
2241 
2242  if (debug) {
2243  const size_t chkNewNumEnt =
2244  this->getNumEntriesInLocalRow (rowinfo.localRow);
2245  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2246  (chkNewNumEnt != oldNumEnt + numNewInds, std::logic_error,
2247  "chkNewNumEnt = " << chkNewNumEnt
2248  << " != oldNumEnt (= " << oldNumEnt
2249  << ") + numNewInds (= " << numNewInds << ").");
2250  }
2251 
2252  return numNewInds;
2253  }
2254 
2255  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2256  size_t
2258  insertGlobalIndicesImpl (const LocalOrdinal lclRow,
2259  const GlobalOrdinal inputGblColInds[],
2260  const size_t numInputInds)
2261  {
2262  return this->insertGlobalIndicesImpl (this->getRowInfo (lclRow),
2263  inputGblColInds, numInputInds);
2264  }
2265 
2266  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2267  size_t
2270  const GlobalOrdinal inputGblColInds[],
2271  const size_t numInputInds,
2272  std::function<void(const size_t, const size_t, const size_t)> fun)
2273  {
2274  using Kokkos::View;
2275  using Kokkos::subview;
2276  using Kokkos::MemoryUnmanaged;
2277  using LO = LocalOrdinal;
2278  using GO = GlobalOrdinal;
2279  const char tfecfFuncName[] = "insertGlobalIndicesImpl: ";
2280 #ifdef HAVE_TPETRA_DEBUG
2281  constexpr bool debug = true;
2282 #else
2283  constexpr bool debug = false;
2284 #endif // HAVE_TPETRA_DEBUG
2285 
2286  const LO lclRow = static_cast<LO> (rowInfo.localRow);
2287 
2288  if (this->getProfileType () == StaticProfile) {
2289  auto numEntries = rowInfo.numEntries;
2290  using inp_view_type = View<const GO*, execution_space, MemoryUnmanaged>;
2291  inp_view_type inputInds(inputGblColInds, numInputInds);
2292  size_t numInserted = Details::insertCrsIndices(lclRow, k_rowPtrs_,
2293  this->k_gblInds1D_, numEntries, inputInds, fun);
2294  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2295  numInserted == Teuchos::OrdinalTraits<size_t>::invalid(),
2296  std::runtime_error,
2297  "There is not enough capacity to insert indices in to row " << lclRow <<
2298  ". The upper bound on the number of entries in this row must be increased to "
2299  "accommodate one or more of the new indices.");
2300  this->k_numRowEntries_(lclRow) += numInserted;
2301  this->setLocallyModified();
2302  return numInserted;
2303  }
2304  else {
2305  // NOTE (DYNAMICPROFILE_REMOVAL) remove block
2306  size_t newNumEntries = rowInfo.numEntries + numInputInds; // preliminary
2307  if (newNumEntries > rowInfo.allocSize) {
2308  // update allocation, doubling size to reduce # reallocations
2309  size_t newAllocSize = 2*rowInfo.allocSize;
2310  if (newAllocSize < newNumEntries) {
2311  newAllocSize = newNumEntries;
2312  }
2313  this->gblInds2D_[lclRow].resize (newAllocSize);
2314  } // newNumEntries > rowInfo.allocSize
2315 
2316  // Copy new indices at end of global index array
2317  GO* const whereToPutGblColInds =
2318  this->gblInds2D_[lclRow].getRawPtr () + rowInfo.numEntries;
2319  for (size_t k_new = 0; k_new < numInputInds; ++k_new) {
2320  whereToPutGblColInds[k_new] = inputGblColInds[k_new];
2321  }
2322  this->k_numRowEntries_(lclRow) += numInputInds;
2323  this->setLocallyModified ();
2324 
2325  if (debug) {
2326  const size_t chkNewNumEntries = this->getNumEntriesInLocalRow (lclRow);
2327  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2328  (chkNewNumEntries != newNumEntries, std::logic_error,
2329  "getNumEntriesInLocalRow(lclRow=" << lclRow << ") = "
2330  << chkNewNumEntries << " != newNumEntries = " << newNumEntries
2331  << ". Please report this bug to the Tpetra developers.");
2332  }
2333  return numInputInds;
2334  }
2335  }
2336 
2337 
2338  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2339  void
2341  insertLocalIndicesImpl (const LocalOrdinal myRow,
2342  const Teuchos::ArrayView<const LocalOrdinal>& indices,
2343  std::function<void(const size_t, const size_t, const size_t)> fun)
2344  {
2345  using Kokkos::MemoryUnmanaged;
2346  using Kokkos::subview;
2347  using Kokkos::View;
2348  using LO = LocalOrdinal;
2349  const char tfecfFuncName[] = "insertLocallIndicesImpl: ";
2350 
2351  const RowInfo rowInfo = this->getRowInfo(myRow);
2352 
2353  size_t numNewInds = 0;
2354  size_t newNumEntries = 0;
2355 
2356  if (this->getProfileType () == StaticProfile) {
2357  auto numEntries = rowInfo.numEntries;
2358  // Note: Teuchos::ArrayViews are in HostSpace
2359  using inp_view_type = View<const LO*, Kokkos::HostSpace, MemoryUnmanaged>;
2360  inp_view_type inputInds(indices.getRawPtr(), indices.size());
2361  auto numInserted = Details::insertCrsIndices(myRow, k_rowPtrs_,
2362  this->k_lclInds1D_, numEntries, inputInds, fun);
2363  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2364  numInserted == Teuchos::OrdinalTraits<size_t>::invalid(),
2365  std::runtime_error,
2366  "There is not enough capacity to insert indices in to row " << myRow <<
2367  ". The upper bound on the number of entries in this row must be increased to "
2368  "accommodate one or more of the new indices.");
2369  numNewInds = numInserted;
2370  newNumEntries = rowInfo.numEntries + numNewInds;
2371  }
2372  else {
2373  // NOTE (DYNAMICPROFILE_REMOVAL) remove block
2374  numNewInds = indices.size();
2375  newNumEntries = rowInfo.numEntries + numNewInds;
2376  if (newNumEntries > rowInfo.allocSize) {
2377  // update allocation, doubling size to reduce number of reallocations
2378  size_t newAllocSize = 2*rowInfo.allocSize;
2379  if (newAllocSize < newNumEntries) {
2380  newAllocSize = newNumEntries;
2381  }
2382  this->lclInds2D_[myRow].resize(newAllocSize);
2383  }
2384  std::copy (indices.begin (), indices.end (),
2385  this->lclInds2D_[myRow].begin () + rowInfo.numEntries);
2386  }
2387 
2388  this->k_numRowEntries_(myRow) += numNewInds;
2389  this->setLocallyModified ();
2390 
2391 #ifdef HAVE_TPETRA_DEBUG
2392  constexpr bool debug = true;
2393 #else
2394  constexpr bool debug = false;
2395 #endif // HAVE_TPETRA_DEBUG
2396 
2397  if (debug) {
2398  const size_t chkNewNumEntries = this->getNumEntriesInLocalRow (myRow);
2399  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2400  (chkNewNumEntries != newNumEntries, std::logic_error,
2401  "getNumEntriesInLocalRow(" << myRow << ") = " << chkNewNumEntries
2402  << " != newNumEntries = " << newNumEntries
2403  << ". Please report this bug to the Tpetra developers.");
2404  }
2405  }
2406 
2407 
2408  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2409  size_t
2412  const Teuchos::ArrayView<const LocalOrdinal>& indices,
2413  std::function<void(const size_t, const size_t, const size_t)> fun) const
2414  {
2415 #ifdef HAVE_TPETRA_DEBUG
2416  const char tfecfFuncName[] = "findLocalIndices: ";
2417  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2418  (this->getProfileType() != StaticProfile, std::runtime_error,
2419  "findLocalIndices requires that the graph have StaticProfile.");
2420 #endif // HAVE_TPETRA_DEBUG
2421  using LO = LocalOrdinal;
2422  using inp_view_type = Kokkos::View<const LO*, Kokkos::HostSpace,
2423  Kokkos::MemoryUnmanaged>;
2424  inp_view_type inputInds(indices.getRawPtr(), indices.size());
2425 
2426  size_t numFound = 0;
2427  LO lclRow = rowInfo.localRow;
2428  if (this->isLocallyIndexed())
2429  {
2430  numFound = Details::findCrsIndices(lclRow, k_rowPtrs_, rowInfo.numEntries,
2431  this->k_lclInds1D_, inputInds, fun);
2432  }
2433  else if (this->isGloballyIndexed())
2434  {
2435  if (this->colMap_.is_null())
2436  return Teuchos::OrdinalTraits<size_t>::invalid();
2437  const auto& colMap = *(this->colMap_);
2438  auto map = [&](LO const lclInd){return colMap.getGlobalElement(lclInd);};
2439  numFound = Details::findCrsIndices(lclRow, k_rowPtrs_, rowInfo.numEntries,
2440  this->k_gblInds1D_, inputInds, map, fun);
2441  }
2442  return numFound;
2443  }
2444 
2445 
2446  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2447  size_t
2449  findGlobalIndices(const RowInfo& rowInfo,
2450  const Teuchos::ArrayView<const GlobalOrdinal>& indices,
2451  std::function<void(const size_t, const size_t, const size_t)> fun) const
2452  {
2453  const char tfecfFuncName[] = "findGlobalIndices: ";
2454  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2455  this->getProfileType() != StaticProfile,
2456  std::runtime_error,
2457  "findLocalIndices requires the graph have StaticProfile");
2458 
2459  using GO = GlobalOrdinal;
2460  using Kokkos::View;
2461  using Kokkos::MemoryUnmanaged;
2462  auto invalidCount = Teuchos::OrdinalTraits<size_t>::invalid();
2463 
2464  using inp_view_type = View<const GO*, execution_space, MemoryUnmanaged>;
2465  inp_view_type inputInds(indices.getRawPtr(), indices.size());
2466 
2467  size_t numFound = 0;
2468  LocalOrdinal lclRow = rowInfo.localRow;
2469  if (this->isLocallyIndexed())
2470  {
2471  if (this->colMap_.is_null())
2472  return invalidCount;
2473  const auto& colMap = *(this->colMap_);
2474  auto map = [&](GO const gblInd){return colMap.getLocalElement(gblInd);};
2475  numFound = Details::findCrsIndices(lclRow, k_rowPtrs_, rowInfo.numEntries,
2476  this->k_lclInds1D_, inputInds, map, fun);
2477  }
2478  else if (this->isGloballyIndexed())
2479  {
2480  numFound = Details::findCrsIndices(lclRow, k_rowPtrs_, rowInfo.numEntries,
2481  this->k_gblInds1D_, inputInds, fun);
2482  }
2483  return numFound;
2484  }
2485 
2486 
2487  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2488  size_t
2491  const bool sorted,
2492  const bool merged)
2493  {
2494  const size_t origNumEnt = rowInfo.numEntries;
2495  if (origNumEnt != Tpetra::Details::OrdinalTraits<size_t>::invalid () &&
2496  origNumEnt != 0) {
2497  auto lclColInds = this->getLocalKokkosRowViewNonConst (rowInfo);
2498 
2499  LocalOrdinal* const lclColIndsRaw = lclColInds.data ();
2500  if (! sorted) {
2501  // FIXME (mfh 08 May 2017) This assumes CUDA UVM.
2502  std::sort (lclColIndsRaw, lclColIndsRaw + origNumEnt);
2503  }
2504 
2505  if (! merged) {
2506  LocalOrdinal* const beg = lclColIndsRaw;
2507  LocalOrdinal* const end = beg + rowInfo.numEntries;
2508  // FIXME (mfh 08 May 2017) This assumes CUDA UVM.
2509  LocalOrdinal* const newend = std::unique (beg, end);
2510  const size_t newNumEnt = newend - beg;
2511 
2512  // NOTE (mfh 08 May 2017) This is a host View, so it does not assume UVM.
2513  this->k_numRowEntries_(rowInfo.localRow) = newNumEnt;
2514  return origNumEnt - newNumEnt; // the number of duplicates in the row
2515  }
2516  else {
2517  return static_cast<size_t> (0); // assume no duplicates
2518  }
2519  }
2520  else {
2521  return static_cast<size_t> (0); // no entries in the row
2522  }
2523  }
2524 
2525 
2526  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2527  void
2529  setDomainRangeMaps (const Teuchos::RCP<const map_type>& domainMap,
2530  const Teuchos::RCP<const map_type>& rangeMap)
2531  {
2532  // simple pointer comparison for equality
2533  if (domainMap_ != domainMap) {
2534  domainMap_ = domainMap;
2535  importer_ = Teuchos::null;
2536  }
2537  if (rangeMap_ != rangeMap) {
2538  rangeMap_ = rangeMap;
2539  exporter_ = Teuchos::null;
2540  }
2541  }
2542 
2543 
2544  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2545  void
2548  {
2549  globalNumEntries_ = Teuchos::OrdinalTraits<global_size_t>::invalid ();
2550  globalNumDiags_ = Teuchos::OrdinalTraits<global_size_t>::invalid ();
2551  globalMaxNumRowEntries_ = Teuchos::OrdinalTraits<global_size_t>::invalid ();
2552  haveGlobalConstants_ = false;
2553  }
2554 
2555 
2556  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2557  void
2560  {
2561  const bool debug = ::Tpetra::Details::Behavior::debug ();
2562  if (debug) {
2563  const char tfecfFuncName[] = "checkInternalState: ";
2564  const char suffix[] = " Please report this bug to the Tpetra developers.";
2565 
2566  const global_size_t GSTI = Teuchos::OrdinalTraits<global_size_t>::invalid ();
2567  //const size_t STI = Teuchos::OrdinalTraits<size_t>::invalid (); // unused
2568  // check the internal state of this data structure
2569  // this is called by numerous state-changing methods, in a debug build, to ensure that the object
2570  // always remains in a valid state
2571 
2572  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2573  (this->rowMap_.is_null (), std::logic_error,
2574  "Row Map is null." << suffix);
2575  // This may access the row Map, so we need to check first (above)
2576  // whether the row Map is null.
2577  const LocalOrdinal lclNumRows =
2578  static_cast<LocalOrdinal> (this->getNodeNumRows ());
2579 
2580  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2581  (this->isFillActive () == this->isFillComplete (), std::logic_error,
2582  "Graph cannot be both fill active and fill complete." << suffix);
2583  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2584  (this->isFillComplete () &&
2585  (this->colMap_.is_null () ||
2586  this->rangeMap_.is_null () ||
2587  this->domainMap_.is_null ()),
2588  std::logic_error,
2589  "Graph is full complete, but at least one of {column, range, domain} "
2590  "Map is null." << suffix);
2591  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2592  (this->isStorageOptimized () && ! this->indicesAreAllocated (),
2593  std::logic_error, "Storage is optimized, but indices are not "
2594  "allocated, not even trivially." << suffix);
2595  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2596  (this->indicesAreAllocated_ &&
2597  (this->storageStatus_ == ::Tpetra::Details::STORAGE_1D_PACKED ||
2598  this->storageStatus_ == ::Tpetra::Details::STORAGE_1D_UNPACKED) &&
2599  this->pftype_ != StaticProfile, std::logic_error,
2600  "Graph claims to have allocated indices and 1-D storage "
2601  "(either packed or unpacked), but also claims to be DynamicProfile.");
2602  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2603  (this->indicesAreAllocated_ &&
2604  this->storageStatus_ == ::Tpetra::Details::STORAGE_2D &&
2605  this->pftype_ == StaticProfile, std::logic_error,
2606  "Graph claims to have allocated indices and 2-D storage, "
2607  "but also claims to be StaticProfile.");
2608  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2609  (this->indicesAreAllocated_ &&
2610  this->storageStatus_ == ::Tpetra::Details::STORAGE_2D &&
2611  this->isLocallyIndexed () &&
2612  static_cast<LocalOrdinal> (this->lclInds2D_.size ()) != lclNumRows,
2613  std::logic_error,
2614  "Graph claims to have allocated indices, be locally indexed, and have "
2615  "2-D storage, but lclInds2D_.size() = " << this->lclInds2D_.size ()
2616  << " != getNodeNumRows() = " << lclNumRows << ".");
2617  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2618  (this->indicesAreAllocated_ &&
2619  this->storageStatus_ == ::Tpetra::Details::STORAGE_2D &&
2620  this->isGloballyIndexed () &&
2621  static_cast<LocalOrdinal> (this->gblInds2D_.size ()) != lclNumRows,
2622  std::logic_error,
2623  "Graph claims to have allocated indices, be globally indexed, and have "
2624  "2-D storage, but gblInds2D_.size() = " << this->gblInds2D_.size ()
2625  << " != getNodeNumRows() = " << lclNumRows << ".");
2626 
2627  size_t nodeAllocSize = 0;
2628  try {
2629  nodeAllocSize = this->getNodeAllocationSize ();
2630  }
2631  catch (std::logic_error& e) {
2632  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2633  (true, std::runtime_error, "getNodeAllocationSize threw "
2634  "std::logic_error: " << e.what ());
2635  }
2636  catch (std::exception& e) {
2637  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2638  (true, std::runtime_error, "getNodeAllocationSize threw an "
2639  "std::exception: " << e.what ());
2640  }
2641  catch (...) {
2642  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2643  (true, std::runtime_error, "getNodeAllocationSize threw an exception "
2644  "not a subclass of std::exception.");
2645  }
2646 
2647  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2648  (this->isStorageOptimized () &&
2649  nodeAllocSize != this->getNodeNumEntries (),
2650  std::logic_error, "Storage is optimized, but "
2651  "this->getNodeAllocationSize() = " << nodeAllocSize
2652  << " != this->getNodeNumEntries() = " << this->getNodeNumEntries ()
2653  << "." << suffix);
2654  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2655  (! this->haveGlobalConstants_ &&
2656  (this->globalNumEntries_ != GSTI ||
2657  this->globalMaxNumRowEntries_ != GSTI),
2658  std::logic_error, "Graph claims not to have global constants, but "
2659  "some of the global constants are not marked as invalid." << suffix);
2660  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2661  (this->haveGlobalConstants_ &&
2662  (this->globalNumEntries_ == GSTI ||
2663  this->globalMaxNumRowEntries_ == GSTI),
2664  std::logic_error, "Graph claims to have global constants, but "
2665  "some of them are marked as invalid." << suffix);
2666  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2667  (this->haveGlobalConstants_ &&
2668  (this->globalNumEntries_ < this->getNodeNumEntries () ||
2669  this->globalMaxNumRowEntries_ < this->nodeMaxNumRowEntries_),
2670  std::logic_error, "Graph claims to have global constants, and "
2671  "all of the values of the global constants are valid, but "
2672  "some of the local constants are greater than "
2673  "their corresponding global constants." << suffix);
2674  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2675  (this->indicesAreAllocated () &&
2676  (this->numAllocForAllRows_ != 0 ||
2677  this->k_numAllocPerRow_.extent (0) != 0),
2678  std::logic_error, "The graph claims that its indices are allocated, but "
2679  "either numAllocForAllRows_ (= " << this->numAllocForAllRows_ << ") is "
2680  "nonzero, or k_numAllocPerRow_ has nonzero dimension. In other words, "
2681  "the graph is supposed to release its \"allocation specifications\" "
2682  "when it allocates its indices." << suffix);
2683  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2684  (this->isStorageOptimized () && this->pftype_ != StaticProfile,
2685  std::logic_error,
2686  "Storage is optimized, but graph is not StaticProfile." << suffix);
2687  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2688  (this->isGloballyIndexed () &&
2689  this->k_rowPtrs_.extent (0) != 0 &&
2690  (static_cast<size_t> (this->k_rowPtrs_.extent (0)) != static_cast<size_t> (lclNumRows + 1) ||
2691  this->k_rowPtrs_(lclNumRows) != static_cast<size_t> (this->k_gblInds1D_.extent (0))),
2692  std::logic_error, "If k_rowPtrs_ has nonzero size and "
2693  "the graph is globally indexed, then "
2694  "k_rowPtrs_ must have N+1 rows, and "
2695  "k_rowPtrs_(N) must equal k_gblInds1D_.extent(0)." << suffix);
2696  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2697  (this->isLocallyIndexed () &&
2698  this->k_rowPtrs_.extent (0) != 0 &&
2699  (static_cast<size_t> (k_rowPtrs_.extent (0)) != static_cast<size_t> (lclNumRows + 1) ||
2700  this->k_rowPtrs_(lclNumRows) != static_cast<size_t> (this->k_lclInds1D_.extent (0))),
2701  std::logic_error, "If k_rowPtrs_ has nonzero size and "
2702  "the graph is locally indexed, then "
2703  "k_rowPtrs_ must have N+1 rows, and "
2704  "k_rowPtrs_(N) must equal k_lclInds1D_.extent(0)." << suffix);
2705 
2706  if (this->pftype_ != StaticProfile) {
2707  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2708  (this->indicesAreAllocated () &&
2709  this->getNodeNumRows () > 0 &&
2710  this->lclInds2D_.is_null () &&
2711  this->gblInds2D_.is_null (),
2712  std::logic_error, "Graph has DynamicProfile, indices are allocated, and "
2713  "the calling process has nonzero rows, but 2-D column index storage "
2714  "(whether local or global) is not present." << suffix);
2715  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2716  (this->indicesAreAllocated () &&
2717  this->getNodeNumRows () > 0 &&
2718  this->k_numRowEntries_.extent (0) == 0,
2719  std::logic_error, "Graph has DynamicProfile, indices are allocated, and "
2720  "the calling process has nonzero rows, but k_numRowEntries_ is not "
2721  "present." << suffix);
2722  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2723  (this->k_lclInds1D_.extent (0) != 0 ||
2724  this->k_gblInds1D_.extent (0) != 0,
2725  std::logic_error, "Graph has DynamicProfile, but "
2726  "1-D allocations are present." << suffix);
2727  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2728  (this->k_rowPtrs_.extent (0) != 0,
2729  std::logic_error, "Graph has DynamicProfile, but "
2730  "row offsets are present." << suffix);
2731  }
2732  else if (this->pftype_ == StaticProfile) {
2733  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2734  (this->indicesAreAllocated () &&
2735  nodeAllocSize > 0 &&
2736  this->k_lclInds1D_.extent (0) == 0 &&
2737  this->k_gblInds1D_.extent (0) == 0,
2738  std::logic_error, "Graph has StaticProfile and is allocated "
2739  "nonnontrivally, but 1-D allocations are not present." << suffix);
2740  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2741  (this->lclInds2D_ != Teuchos::null || this->gblInds2D_ != Teuchos::null,
2742  std::logic_error, "Graph has StaticProfile, but 2-D allocations are "
2743  "present." << suffix);
2744  }
2745 
2746  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2747  (! this->indicesAreAllocated () &&
2748  ((this->k_rowPtrs_.extent (0) != 0 ||
2749  this->k_numRowEntries_.extent (0) != 0) ||
2750  this->k_lclInds1D_.extent (0) != 0 ||
2751  this->lclInds2D_ != Teuchos::null ||
2752  this->k_gblInds1D_.extent (0) != 0 ||
2753  this->gblInds2D_ != Teuchos::null),
2754  std::logic_error, "If indices are not allocated, "
2755  "then none of the buffers should be." << suffix);
2756  // indices may be local or global only if they are allocated
2757  // (numAllocated is redundant; could simply be indicesAreLocal_ ||
2758  // indicesAreGlobal_)
2759  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2760  ((this->indicesAreLocal_ || this->indicesAreGlobal_) &&
2761  ! this->indicesAreAllocated_,
2762  std::logic_error, "Indices may be local or global only if they are "
2763  "allocated." << suffix);
2764  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2765  (this->indicesAreLocal_ && this->indicesAreGlobal_,
2766  std::logic_error, "Indices may not be both local and global." << suffix);
2767  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2768  (this->indicesAreLocal_ &&
2769  (this->k_gblInds1D_.extent (0) != 0 || ! this->gblInds2D_.is_null ()),
2770  std::logic_error, "Indices are local, but either "
2771  "k_gblInds1D_.extent(0) (= "
2772  << this->k_gblInds1D_.extent (0) << ") != 0, or "
2773  "gblInds2D_ is not null. In other words, if indices are local, "
2774  "then global allocations should not be present." << suffix);
2775  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2776  (this->indicesAreGlobal_ &&
2777  (this->k_lclInds1D_.extent (0) != 0 ||
2778  ! this->lclInds2D_.is_null ()),
2779  std::logic_error, "Indices are global, but either "
2780  "k_lclInds1D_.extent(0) (= "
2781  << this->k_lclInds1D_.extent (0) << ") != 0, or "
2782  "lclInds2D_ is not null. In other words, if indices are global, "
2783  "then local allocations should not be present." << suffix);
2784  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2785  (this->indicesAreLocal_ &&
2786  nodeAllocSize > 0 &&
2787  this->k_lclInds1D_.extent (0) == 0 &&
2788  this->getNodeNumRows () > 0 &&
2789  this->lclInds2D_.is_null (),
2790  std::logic_error, "Indices are local, getNodeAllocationSize() = "
2791  << nodeAllocSize << " > 0, k_lclInds1D_.extent(0) = 0, "
2792  "getNodeNumRows() = " << this->getNodeNumRows () << " > 0, and "
2793  "lclInds2D_ is null." << suffix);
2794  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2795  (this->indicesAreGlobal_ &&
2796  nodeAllocSize > 0 &&
2797  this->k_gblInds1D_.extent (0) == 0 &&
2798  this->getNodeNumRows () > 0 &&
2799  this->gblInds2D_.is_null (),
2800  std::logic_error, "Indices are global, getNodeAllocationSize() = "
2801  << nodeAllocSize << " > 0, k_gblInds1D_.extent(0) = 0, "
2802  "getNodeNumRows() = " << this->getNodeNumRows () << " > 0, and "
2803  "gblInds2D_ is null." << suffix);
2804  // check the actual allocations
2805  if (this->indicesAreAllocated () &&
2806  this->pftype_ == StaticProfile &&
2807  this->k_rowPtrs_.extent (0) != 0) {
2808  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2809  (static_cast<size_t> (this->k_rowPtrs_.extent (0)) !=
2810  this->getNodeNumRows () + 1,
2811  std::logic_error, "Graph is StaticProfile, indices are allocated, and "
2812  "k_rowPtrs_ has nonzero length, but k_rowPtrs_.extent(0) = "
2813  << this->k_rowPtrs_.extent (0) << " != getNodeNumRows()+1 = "
2814  << (this->getNodeNumRows () + 1) << "." << suffix);
2815  const size_t actualNumAllocated =
2816  ::Tpetra::Details::getEntryOnHost (this->k_rowPtrs_, this->getNodeNumRows ());
2817  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2818  (this->isLocallyIndexed () &&
2819  static_cast<size_t> (this->k_lclInds1D_.extent (0)) != actualNumAllocated,
2820  std::logic_error, "Graph is StaticProfile and locally indexed, "
2821  "indices are allocated, and k_rowPtrs_ has nonzero length, but "
2822  "k_lclInds1D_.extent(0) = " << this->k_lclInds1D_.extent (0)
2823  << " != actualNumAllocated = " << actualNumAllocated << suffix);
2824  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2825  (this->isGloballyIndexed () &&
2826  static_cast<size_t> (this->k_gblInds1D_.extent (0)) != actualNumAllocated,
2827  std::logic_error, "Graph is StaticProfile and globally indexed, "
2828  "indices are allocated, and k_rowPtrs_ has nonzero length, but "
2829  "k_gblInds1D_.extent(0) = " << this->k_gblInds1D_.extent (0)
2830  << " != actualNumAllocated = " << actualNumAllocated << suffix);
2831  }
2832  }
2833  }
2834 
2835 
2836  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2837  size_t
2839  getNumEntriesInGlobalRow (GlobalOrdinal globalRow) const
2840  {
2841  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (globalRow);
2842  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2843  return Teuchos::OrdinalTraits<size_t>::invalid ();
2844  }
2845  else {
2846  return rowInfo.numEntries;
2847  }
2848  }
2849 
2850 
2851  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2852  size_t
2854  getNumEntriesInLocalRow (LocalOrdinal localRow) const
2855  {
2856  const RowInfo rowInfo = this->getRowInfo (localRow);
2857  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2858  return Teuchos::OrdinalTraits<size_t>::invalid ();
2859  }
2860  else {
2861  return rowInfo.numEntries;
2862  }
2863  }
2864 
2865 
2866  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2867  size_t
2869  getNumAllocatedEntriesInGlobalRow (GlobalOrdinal globalRow) const
2870  {
2871  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (globalRow);
2872  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2873  return Teuchos::OrdinalTraits<size_t>::invalid ();
2874  }
2875  else {
2876  return rowInfo.allocSize;
2877  }
2878  }
2879 
2880 
2881  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2882  size_t
2884  getNumAllocatedEntriesInLocalRow (LocalOrdinal localRow) const
2885  {
2886  const RowInfo rowInfo = this->getRowInfo (localRow);
2887  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2888  return Teuchos::OrdinalTraits<size_t>::invalid ();
2889  }
2890  else {
2891  return rowInfo.allocSize;
2892  }
2893  }
2894 
2895 
2896  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2897  Teuchos::ArrayRCP<const size_t>
2900  {
2901  using Kokkos::ViewAllocateWithoutInitializing;
2902  using Kokkos::create_mirror_view;
2903  using Teuchos::ArrayRCP;
2904  typedef typename local_graph_type::row_map_type row_map_type;
2905  typedef typename row_map_type::non_const_value_type row_offset_type;
2906  const char prefix[] = "Tpetra::CrsGraph::getNodeRowPtrs: ";
2907  const char suffix[] = " Please report this bug to the Tpetra developers.";
2908  const bool debug = ::Tpetra::Details::Behavior::debug ();
2909 
2910  const size_t size = k_rowPtrs_.extent (0);
2911  constexpr bool same = std::is_same<size_t, row_offset_type>::value;
2912 
2913  if (size == 0) {
2914  return ArrayRCP<const size_t> ();
2915  }
2916 
2917  ArrayRCP<const row_offset_type> ptr_rot;
2918  ArrayRCP<const size_t> ptr_st;
2919  if (same) { // size_t == row_offset_type
2920  // NOTE (mfh 22 Mar 2015) In a debug build of Kokkos, the result
2921  // of create_mirror_view might actually be a new allocation.
2922  // This helps with debugging when there are two memory spaces.
2923  typename row_map_type::HostMirror ptr_h = create_mirror_view (k_rowPtrs_);
2924  Kokkos::deep_copy (ptr_h, k_rowPtrs_);
2925  if (debug) {
2926  TEUCHOS_TEST_FOR_EXCEPTION
2927  (ptr_h.extent (0) != k_rowPtrs_.extent (0), std::logic_error,
2928  prefix << "size_t == row_offset_type, but ptr_h.extent(0) = "
2929  << ptr_h.extent (0) << " != k_rowPtrs_.extent(0) = "
2930  << k_rowPtrs_.extent (0) << ".");
2931  TEUCHOS_TEST_FOR_EXCEPTION
2932  (same && size != 0 && k_rowPtrs_.data () == nullptr, std::logic_error,
2933  prefix << "size_t == row_offset_type and k_rowPtrs_.extent(0) = "
2934  << size << " != 0, but k_rowPtrs_.data() == nullptr." << suffix);
2935  TEUCHOS_TEST_FOR_EXCEPTION
2936  (same && size != 0 && ptr_h.data () == nullptr, std::logic_error,
2937  prefix << "size_t == row_offset_type and k_rowPtrs_.extent(0) = "
2938  << size << " != 0, but create_mirror_view(k_rowPtrs_).data() "
2939  "== nullptr." << suffix);
2940  }
2941  ptr_rot = Kokkos::Compat::persistingView (ptr_h);
2942  }
2943  else { // size_t != row_offset_type
2944  typedef Kokkos::View<size_t*, device_type> ret_view_type;
2945  ret_view_type ptr_d (ViewAllocateWithoutInitializing ("ptr"), size);
2946  ::Tpetra::Details::copyOffsets (ptr_d, k_rowPtrs_);
2947  typename ret_view_type::HostMirror ptr_h = create_mirror_view (ptr_d);
2948  Kokkos::deep_copy (ptr_h, ptr_d);
2949  ptr_st = Kokkos::Compat::persistingView (ptr_h);
2950  }
2951  if (debug) {
2952  TEUCHOS_TEST_FOR_EXCEPTION
2953  (same && size != 0 && ptr_rot.is_null (), std::logic_error,
2954  prefix << "size_t == row_offset_type and size = " << size
2955  << " != 0, but ptr_rot is null." << suffix);
2956  TEUCHOS_TEST_FOR_EXCEPTION
2957  (! same && size != 0 && ptr_st.is_null (), std::logic_error,
2958  prefix << "size_t != row_offset_type and size = " << size
2959  << " != 0, but ptr_st is null." << suffix);
2960  }
2961 
2962  // If size_t == row_offset_type, return a persisting host view of
2963  // k_rowPtrs_. Otherwise, return a size_t host copy of k_rowPtrs_.
2964  ArrayRCP<const size_t> retval =
2965  Kokkos::Impl::if_c<same,
2966  ArrayRCP<const row_offset_type>,
2967  ArrayRCP<const size_t> >::select (ptr_rot, ptr_st);
2968  if (debug) {
2969  TEUCHOS_TEST_FOR_EXCEPTION
2970  (size != 0 && retval.is_null (), std::logic_error,
2971  prefix << "size = " << size << " != 0, but retval is null." << suffix);
2972  }
2973  return retval;
2974  }
2975 
2976 
2977  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2978  Teuchos::ArrayRCP<const LocalOrdinal>
2981  {
2982  return Kokkos::Compat::persistingView (k_lclInds1D_);
2983  }
2984 
2985 
2986  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2987  void
2989  getLocalRowCopy (LocalOrdinal localRow,
2990  const Teuchos::ArrayView<LocalOrdinal>&indices,
2991  size_t& numEntries) const
2992  {
2993  using Teuchos::ArrayView;
2994  typedef LocalOrdinal LO;
2995  typedef GlobalOrdinal GO;
2996  const char tfecfFuncName[] = "getLocalRowCopy: ";
2997 
2998  TEUCHOS_TEST_FOR_EXCEPTION(
2999  isGloballyIndexed () && ! hasColMap (), std::runtime_error,
3000  "Tpetra::CrsGraph::getLocalRowCopy: The graph is globally indexed and "
3001  "does not have a column Map yet. That means we don't have local indices "
3002  "for columns yet, so it doesn't make sense to call this method. If the "
3003  "graph doesn't have a column Map yet, you should call fillComplete on "
3004  "it first.");
3005 
3006  // This does the right thing (reports an empty row) if the input
3007  // row is invalid.
3008  const RowInfo rowinfo = this->getRowInfo (localRow);
3009  // No side effects on error.
3010  const size_t theNumEntries = rowinfo.numEntries;
3011  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3012  (static_cast<size_t> (indices.size ()) < theNumEntries, std::runtime_error,
3013  "Specified storage (size==" << indices.size () << ") does not suffice "
3014  "to hold all " << theNumEntries << " entry/ies for this row.");
3015  numEntries = theNumEntries;
3016 
3017  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
3018  if (isLocallyIndexed ()) {
3019  ArrayView<const LO> lview = getLocalView (rowinfo);
3020  for (size_t j = 0; j < theNumEntries; ++j) {
3021  indices[j] = lview[j];
3022  }
3023  }
3024  else if (isGloballyIndexed ()) {
3025  ArrayView<const GO> gview = getGlobalView (rowinfo);
3026  for (size_t j = 0; j < theNumEntries; ++j) {
3027  indices[j] = colMap_->getLocalElement (gview[j]);
3028  }
3029  }
3030  }
3031  }
3032 
3033 
3034  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3035  void
3037  getGlobalRowCopy (GlobalOrdinal globalRow,
3038  const Teuchos::ArrayView<GlobalOrdinal>& indices,
3039  size_t& numEntries) const
3040  {
3041  using Teuchos::ArrayView;
3042  const char tfecfFuncName[] = "getGlobalRowCopy: ";
3043 
3044  // This does the right thing (reports an empty row) if the input
3045  // row is invalid.
3046  const RowInfo rowinfo = getRowInfoFromGlobalRowIndex (globalRow);
3047  const size_t theNumEntries = rowinfo.numEntries;
3048  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3049  static_cast<size_t> (indices.size ()) < theNumEntries, std::runtime_error,
3050  "Specified storage (size==" << indices.size () << ") does not suffice "
3051  "to hold all " << theNumEntries << " entry/ies for this row.");
3052  numEntries = theNumEntries; // first side effect
3053 
3054  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
3055  if (isLocallyIndexed ()) {
3056  ArrayView<const LocalOrdinal> lview = getLocalView (rowinfo);
3057  for (size_t j = 0; j < theNumEntries; ++j) {
3058  indices[j] = colMap_->getGlobalElement (lview[j]);
3059  }
3060  }
3061  else if (isGloballyIndexed ()) {
3062  ArrayView<const GlobalOrdinal> gview = getGlobalView (rowinfo);
3063  for (size_t j = 0; j < theNumEntries; ++j) {
3064  indices[j] = gview[j];
3065  }
3066  }
3067  }
3068  }
3069 
3070 
3071  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3072  void
3074  getLocalRowView (const LocalOrdinal localRow,
3075  Teuchos::ArrayView<const LocalOrdinal>& indices) const
3076  {
3077  const char tfecfFuncName[] = "getLocalRowView: ";
3078 #ifdef HAVE_TPETRA_DEBUG
3079  constexpr bool debug = true;
3080 #else
3081  constexpr bool debug = false;
3082 #endif // HAVE_TPETRA_DEBUG
3083 
3084  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3085  (isGloballyIndexed (), std::runtime_error, "The graph's indices are "
3086  "currently stored as global indices, so we cannot return a view with "
3087  "local column indices, whether or not the graph has a column Map. If "
3088  "the graph _does_ have a column Map, use getLocalRowCopy() instead.");
3089 
3090  // This does the right thing (reports an empty row) if the input
3091  // row is invalid.
3092  const RowInfo rowInfo = getRowInfo (localRow);
3093  indices = Teuchos::null;
3094  if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
3095  rowInfo.numEntries > 0) {
3096  indices = this->getLocalView (rowInfo);
3097  // getLocalView returns a view of the _entire_ row, including
3098  // any extra space at the end (which 1-D unpacked storage
3099  // might have, for example). That's why we have to take a
3100  // subview of the returned view.
3101  indices = indices (0, rowInfo.numEntries);
3102  }
3103 
3104  if (debug) {
3105  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3106  (static_cast<size_t> (indices.size ()) !=
3107  getNumEntriesInLocalRow (localRow), std::logic_error, "indices.size() "
3108  "= " << indices.size () << " != getNumEntriesInLocalRow(localRow=" <<
3109  localRow << ") = " << getNumEntriesInLocalRow (localRow) <<
3110  ". Please report this bug to the Tpetra developers.");
3111  }
3112  }
3113 
3114 
3115  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3116  void
3118  getGlobalRowView (const GlobalOrdinal globalRow,
3119  Teuchos::ArrayView<const GlobalOrdinal>& indices) const
3120  {
3121  const char tfecfFuncName[] = "getGlobalRowView: ";
3122 #ifdef HAVE_TPETRA_DEBUG
3123  constexpr bool debug = true;
3124 #else
3125  constexpr bool debug = false;
3126 #endif // HAVE_TPETRA_DEBUG
3127 
3128  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3129  (isLocallyIndexed (), std::runtime_error, "The graph's indices are "
3130  "currently stored as local indices, so we cannot return a view with "
3131  "global column indices. Use getGlobalRowCopy() instead.");
3132 
3133  // This does the right thing (reports an empty row) if the input
3134  // row is invalid.
3135  const RowInfo rowInfo = getRowInfoFromGlobalRowIndex (globalRow);
3136  indices = Teuchos::null;
3137  if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
3138  rowInfo.numEntries > 0) {
3139  indices = (this->getGlobalView (rowInfo)) (0, rowInfo.numEntries);
3140  }
3141 
3142  if (debug) {
3143  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3144  (static_cast<size_t> (indices.size ()) !=
3145  getNumEntriesInGlobalRow (globalRow),
3146  std::logic_error, "indices.size() = " << indices.size ()
3147  << " != getNumEntriesInGlobalRow(globalRow=" << globalRow << ") = "
3148  << getNumEntriesInGlobalRow (globalRow)
3149  << ". Please report this bug to the Tpetra developers.");
3150  }
3151  }
3152 
3153 
3154  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3155  void
3157  insertLocalIndices (const LocalOrdinal localRow,
3158  const Teuchos::ArrayView<const LocalOrdinal>& indices)
3159  {
3160  const char tfecfFuncName[] = "insertLocalIndices: ";
3161 
3162  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3163  (! isFillActive (), std::runtime_error, "Fill must be active.");
3164  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3165  (isGloballyIndexed (), std::runtime_error,
3166  "Graph indices are global; use insertGlobalIndices().");
3167  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3168  (! hasColMap (), std::runtime_error,
3169  "Cannot insert local indices without a column Map.");
3170  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3171  (! rowMap_->isNodeLocalElement (localRow), std::runtime_error,
3172  "Local row index " << localRow << " is not in the row Map "
3173  "on the calling process.");
3174  if (! indicesAreAllocated ()) {
3175  allocateIndices (LocalIndices);
3176  }
3177 
3178 #ifdef HAVE_TPETRA_DEBUG
3179  constexpr bool debug = true;
3180 #else
3181  constexpr bool debug = false;
3182 #endif // HAVE_TPETRA_DEBUG
3183 
3184  if (debug) {
3185  // In debug mode, if the graph has a column Map, test whether any
3186  // of the given column indices are not in the column Map. Keep
3187  // track of the invalid column indices so we can tell the user
3188  // about them.
3189  if (hasColMap ()) {
3190  using Teuchos::Array;
3191  using Teuchos::toString;
3192  using std::endl;
3193  typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type size_type;
3194 
3195  const map_type& colMap = *colMap_;
3196  Array<LocalOrdinal> badColInds;
3197  bool allInColMap = true;
3198  for (size_type k = 0; k < indices.size (); ++k) {
3199  if (! colMap.isNodeLocalElement (indices[k])) {
3200  allInColMap = false;
3201  badColInds.push_back (indices[k]);
3202  }
3203  }
3204  if (! allInColMap) {
3205  std::ostringstream os;
3206  os << "Tpetra::CrsGraph::insertLocalIndices: You attempted to insert "
3207  "entries in owned row " << localRow << ", at the following column "
3208  "indices: " << toString (indices) << "." << endl;
3209  os << "Of those, the following indices are not in the column Map on "
3210  "this process: " << toString (badColInds) << "." << endl << "Since "
3211  "the graph has a column Map already, it is invalid to insert entries "
3212  "at those locations.";
3213  TEUCHOS_TEST_FOR_EXCEPTION(! allInColMap, std::invalid_argument, os.str ());
3214  }
3215  }
3216  }
3217 
3218  insertLocalIndicesImpl (localRow, indices);
3219 
3220  if (debug) {
3221  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3222  (! indicesAreAllocated () || ! isLocallyIndexed (), std::logic_error,
3223  "At the end of insertLocalIndices, ! indicesAreAllocated() || "
3224  "! isLocallyIndexed() is true. Please report this bug to the "
3225  "Tpetra developers.");
3226  }
3227  }
3228 
3229  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3230  void
3232  insertLocalIndices (const LocalOrdinal localRow,
3233  const LocalOrdinal numEnt,
3234  const LocalOrdinal inds[])
3235  {
3236  Teuchos::ArrayView<const LocalOrdinal> indsT (inds, numEnt);
3237  this->insertLocalIndices (localRow, indsT);
3238  }
3239 
3240 
3241  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3242  void
3244  insertGlobalIndices (const GlobalOrdinal gblRow,
3245  const LocalOrdinal numInputInds,
3246  const GlobalOrdinal inputGblColInds[])
3247  {
3248  typedef LocalOrdinal LO;
3249  const char tfecfFuncName[] = "insertGlobalIndices: ";
3250 #ifdef HAVE_TPETRA_DEBUG
3251  constexpr bool debug = true;
3252 #else
3253  constexpr bool debug = false;
3254 #endif // HAVE_TPETRA_DEBUG
3255 
3256  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3257  (this->isLocallyIndexed (), std::runtime_error,
3258  "graph indices are local; use insertLocalIndices().");
3259  // This can't really be satisfied for now, because if we are
3260  // fillComplete(), then we are local. In the future, this may
3261  // change. However, the rule that modification require active
3262  // fill will not change.
3263  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3264  (! this->isFillActive (), std::runtime_error,
3265  "You are not allowed to call this method if fill is not active. "
3266  "If fillComplete has been called, you must first call resumeFill "
3267  "before you may insert indices.");
3268  if (! this->indicesAreAllocated ()) {
3269  this->allocateIndices (GlobalIndices);
3270  }
3271  const LO lclRow = this->rowMap_->getLocalElement (gblRow);
3272  if (lclRow != Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
3273  if (debug) {
3274  if (this->hasColMap ()) {
3275  using std::endl;
3276  const map_type& colMap = * (this->colMap_);
3277  // In a debug build, keep track of the nonowned ("bad") column
3278  // indices, so that we can display them in the exception
3279  // message. In a release build, just ditch the loop early if
3280  // we encounter a nonowned column index.
3281  std::vector<GlobalOrdinal> badColInds;
3282  bool allInColMap = true;
3283  for (LO k = 0; k < numInputInds; ++k) {
3284  if (! colMap.isNodeGlobalElement (inputGblColInds[k])) {
3285  allInColMap = false;
3286  badColInds.push_back (inputGblColInds[k]);
3287  }
3288  }
3289  if (! allInColMap) {
3290  std::ostringstream os;
3291  os << "You attempted to insert entries in owned row " << gblRow
3292  << ", at the following column indices: [";
3293  for (LO k = 0; k < numInputInds; ++k) {
3294  os << inputGblColInds[k];
3295  if (k + static_cast<LO> (1) < numInputInds) {
3296  os << ",";
3297  }
3298  }
3299  os << "]." << endl << "Of those, the following indices are not in "
3300  "the column Map on this process: [";
3301  for (size_t k = 0; k < badColInds.size (); ++k) {
3302  os << badColInds[k];
3303  if (k + size_t (1) < badColInds.size ()) {
3304  os << ",";
3305  }
3306  }
3307  os << "]." << endl << "Since the matrix has a column Map already, "
3308  "it is invalid to insert entries at those locations.";
3309  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3310  (true, std::invalid_argument, os.str ());
3311  }
3312  }
3313  } // debug
3314  this->insertGlobalIndicesImpl (lclRow, inputGblColInds, numInputInds);
3315  }
3316  else { // a nonlocal row
3317  this->insertGlobalIndicesIntoNonownedRows (gblRow, inputGblColInds,
3318  numInputInds);
3319  }
3320  }
3321 
3322 
3323  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3324  void
3326  insertGlobalIndices (const GlobalOrdinal gblRow,
3327  const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds)
3328  {
3329  this->insertGlobalIndices (gblRow, inputGblColInds.size (),
3330  inputGblColInds.getRawPtr ());
3331  }
3332 
3333 
3334  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3335  void
3337  insertGlobalIndicesFiltered (const LocalOrdinal lclRow,
3338  const GlobalOrdinal gblColInds[],
3339  const LocalOrdinal numGblColInds)
3340  {
3341  typedef LocalOrdinal LO;
3342  typedef GlobalOrdinal GO;
3343  const char tfecfFuncName[] = "insertGlobalIndicesFiltered: ";
3344 
3345  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3346  (this->isLocallyIndexed (), std::runtime_error,
3347  "Graph indices are local; use insertLocalIndices().");
3348  // This can't really be satisfied for now, because if we are
3349  // fillComplete(), then we are local. In the future, this may
3350  // change. However, the rule that modification require active
3351  // fill will not change.
3352  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3353  (! this->isFillActive (), std::runtime_error,
3354  "You are not allowed to call this method if fill is not active. "
3355  "If fillComplete has been called, you must first call resumeFill "
3356  "before you may insert indices.");
3357  if (! this->indicesAreAllocated ()) {
3358  this->allocateIndices (GlobalIndices);
3359  }
3360 
3361  Teuchos::ArrayView<const GO> gblColInds_av (gblColInds, numGblColInds);
3362  // If we have a column Map, use it to filter the entries.
3363  if (! this->colMap_.is_null ()) {
3364  const map_type& colMap = * (this->colMap_);
3365 
3366  LO curOffset = 0;
3367  while (curOffset < numGblColInds) {
3368  // Find a sequence of input indices that are in the column Map
3369  // on the calling process. Doing a sequence at a time,
3370  // instead of one at a time, amortizes some overhead.
3371  LO endOffset = curOffset;
3372  for ( ; endOffset < numGblColInds; ++endOffset) {
3373  const LO lclCol = colMap.getLocalElement (gblColInds[endOffset]);
3374  if (lclCol == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
3375  break; // first entry, in current sequence, not in the column Map
3376  }
3377  }
3378  // curOffset, endOffset: half-exclusive range of indices in
3379  // the column Map on the calling process. If endOffset ==
3380  // curOffset, the range is empty.
3381  const LO numIndInSeq = (endOffset - curOffset);
3382  if (numIndInSeq != 0) {
3383  this->insertGlobalIndicesImpl (lclRow, gblColInds + curOffset,
3384  numIndInSeq);
3385  }
3386  // Invariant before this line: Either endOffset ==
3387  // numGblColInds, or gblColInds[endOffset] is not in the
3388  // column Map on the calling process.
3389  curOffset = endOffset + 1;
3390  }
3391  }
3392  else {
3393  this->insertGlobalIndicesImpl (lclRow, gblColInds_av.getRawPtr (),
3394  gblColInds_av.size ());
3395  }
3396  }
3397 
3398  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3399  void
3401  insertGlobalIndicesIntoNonownedRows (const GlobalOrdinal gblRow,
3402  const GlobalOrdinal gblColInds[],
3403  const LocalOrdinal numGblColInds)
3404  {
3405  // This creates the std::vector if it doesn't exist yet.
3406  // std::map's operator[] does a lookup each time, so it's better
3407  // to pull nonlocals_[grow] out of the loop.
3408  std::vector<GlobalOrdinal>& nonlocalRow = this->nonlocals_[gblRow];
3409  for (LocalOrdinal k = 0; k < numGblColInds; ++k) {
3410  // FIXME (mfh 20 Jul 2017) Would be better to use a set, in
3411  // order to avoid duplicates. globalAssemble() sorts these
3412  // anyway.
3413  nonlocalRow.push_back (gblColInds[k]);
3414  }
3415  }
3416 
3417  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3418  void
3420  removeLocalIndices (LocalOrdinal lrow)
3421  {
3422  const char tfecfFuncName[] = "removeLocalIndices: ";
3423  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3424  ! isFillActive (), std::runtime_error, "requires that fill is active.");
3425  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3426  isStorageOptimized (), std::runtime_error,
3427  "cannot remove indices after optimizeStorage() has been called.");
3428  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3429  isGloballyIndexed (), std::runtime_error, "graph indices are global.");
3430  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3431  ! rowMap_->isNodeLocalElement (lrow), std::runtime_error,
3432  "Local row " << lrow << " is not in the row Map on the calling process.");
3433  if (! indicesAreAllocated ()) {
3434  allocateIndices (LocalIndices);
3435  }
3436 
3437  // FIXME (mfh 13 Aug 2014) What if they haven't been cleared on
3438  // all processes?
3439  clearGlobalConstants ();
3440 
3441  if (k_numRowEntries_.extent (0) != 0) {
3442  this->k_numRowEntries_(lrow) = 0;
3443  }
3444 #ifdef HAVE_TPETRA_DEBUG
3445  constexpr bool debug = true;
3446 #else
3447  constexpr bool debug = false;
3448 #endif // HAVE_TPETRA_DEBUG
3449 
3450  if (debug) {
3451  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3452  (getNumEntriesInLocalRow (lrow) != 0 ||
3453  ! indicesAreAllocated () ||
3454  ! isLocallyIndexed (), std::logic_error,
3455  "Violated stated post-conditions. Please contact Tpetra team.");
3456  }
3457  }
3458 
3459 
3460  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3461  void
3463  setAllIndices (const typename local_graph_type::row_map_type& rowPointers,
3464  const typename local_graph_type::entries_type::non_const_type& columnIndices)
3465  {
3466  const char tfecfFuncName[] = "setAllIndices: ";
3467  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3468  ! hasColMap () || getColMap ().is_null (), std::runtime_error,
3469  "The graph must have a column Map before you may call this method.");
3470  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3471  static_cast<size_t> (rowPointers.size ()) != this->getNodeNumRows () + 1,
3472  std::runtime_error, "rowPointers.size() = " << rowPointers.size () <<
3473  " != this->getNodeNumRows()+1 = " << (this->getNodeNumRows () + 1) <<
3474  ".");
3475 
3476  // FIXME (mfh 07 Aug 2014) We need to relax this restriction,
3477  // since the future model will be allocation at construction, not
3478  // lazy allocation on first insert.
3479  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3480  ((this->k_lclInds1D_.extent (0) != 0 || this->k_gblInds1D_.extent (0) != 0),
3481  std::runtime_error, "You may not call this method if 1-D data "
3482  "structures are already allocated.");
3483 
3484  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3485  (this->lclInds2D_ != Teuchos::null ||
3486  this->gblInds2D_ != Teuchos::null,
3487  std::runtime_error, "You may not call this method if 2-D data "
3488  "structures are already allocated.");
3489 
3490  indicesAreAllocated_ = true;
3491  indicesAreLocal_ = true;
3492  pftype_ = StaticProfile; // if the profile wasn't static before, it sure is now.
3493  k_lclInds1D_ = columnIndices;
3494  k_rowPtrs_ = rowPointers;
3495  // Storage MUST be packed, since the interface doesn't give any
3496  // way to indicate any extra space at the end of each row.
3497  storageStatus_ = ::Tpetra::Details::STORAGE_1D_PACKED;
3498 
3499  // Build the local graph.
3500  lclGraph_ = local_graph_type (k_lclInds1D_, k_rowPtrs_);
3501 
3502  // These normally get cleared out at the end of allocateIndices.
3503  // It makes sense to clear them out here, because at the end of
3504  // this method, the graph is allocated on the calling process.
3505  numAllocForAllRows_ = 0;
3506  k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
3507 
3508  checkInternalState ();
3509  }
3510 
3511 
3512  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3513  void
3515  setAllIndices (const Teuchos::ArrayRCP<size_t>& rowPointers,
3516  const Teuchos::ArrayRCP<LocalOrdinal>& columnIndices)
3517  {
3518  using Kokkos::View;
3519  typedef typename local_graph_type::row_map_type row_map_type;
3520  typedef typename row_map_type::array_layout layout_type;
3521  typedef typename row_map_type::non_const_value_type row_offset_type;
3522  typedef View<size_t*, layout_type , Kokkos::HostSpace,
3523  Kokkos::MemoryUnmanaged> input_view_type;
3524  typedef typename row_map_type::non_const_type nc_row_map_type;
3525 
3526  const size_t size = static_cast<size_t> (rowPointers.size ());
3527  constexpr bool same = std::is_same<size_t, row_offset_type>::value;
3528  input_view_type ptr_in (rowPointers.getRawPtr (), size);
3529 
3530  nc_row_map_type ptr_rot ("Tpetra::CrsGraph::ptr", size);
3531 
3532  if (same) { // size_t == row_offset_type
3533  // This compile-time logic ensures that the compiler never sees
3534  // an assignment of View<row_offset_type*, ...> to View<size_t*,
3535  // ...> unless size_t == row_offset_type.
3536  input_view_type ptr_decoy (rowPointers.getRawPtr (), size); // never used
3537  Kokkos::deep_copy (Kokkos::Impl::if_c<same,
3538  nc_row_map_type,
3539  input_view_type>::select (ptr_rot, ptr_decoy),
3540  ptr_in);
3541  }
3542  else { // size_t != row_offset_type
3543  // CudaUvmSpace != HostSpace, so this will be false in that case.
3544  constexpr bool inHostMemory =
3545  std::is_same<typename row_map_type::memory_space,
3546  Kokkos::HostSpace>::value;
3547  if (inHostMemory) {
3548  // Copy (with cast from size_t to row_offset_type, with bounds
3549  // checking if necessary) to ptr_rot.
3550  ::Tpetra::Details::copyOffsets (ptr_rot, ptr_in);
3551  }
3552  else { // Copy input row offsets to device first.
3553  //
3554  // FIXME (mfh 24 Mar 2015) If CUDA UVM, running in the host's
3555  // execution space would avoid the double copy.
3556  //
3557  View<size_t*, layout_type ,execution_space > ptr_st ("Tpetra::CrsGraph::ptr", size);
3558  Kokkos::deep_copy (ptr_st, ptr_in);
3559  // Copy on device (casting from size_t to row_offset_type,
3560  // with bounds checking if necessary) to ptr_rot. This
3561  // executes in the output View's execution space, which is the
3562  // same as execution_space.
3563  ::Tpetra::Details::copyOffsets (ptr_rot, ptr_st);
3564  }
3565  }
3566 
3567  Kokkos::View<LocalOrdinal*, layout_type , execution_space > k_ind =
3568  Kokkos::Compat::getKokkosViewDeepCopy<device_type> (columnIndices ());
3569  setAllIndices (ptr_rot, k_ind);
3570  }
3571 
3572 
3573  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3574  void
3576  getNumEntriesPerLocalRowUpperBound (Teuchos::ArrayRCP<const size_t>& boundPerLocalRow,
3577  size_t& boundForAllLocalRows,
3578  bool& boundSameForAllLocalRows) const
3579  {
3580  const char tfecfFuncName[] = "getNumEntriesPerLocalRowUpperBound: ";
3581  const char suffix[] = " Please report this bug to the Tpetra developers.";
3582 
3583  // The three output arguments. We assign them to the actual
3584  // output arguments at the end, in order to implement
3585  // transactional semantics.
3586  Teuchos::ArrayRCP<const size_t> numEntriesPerRow;
3587  size_t numEntriesForAll = 0;
3588  bool allRowsSame = true;
3589 
3590  const ptrdiff_t numRows = static_cast<ptrdiff_t> (this->getNodeNumRows ());
3591 
3592  if (this->indicesAreAllocated ()) {
3593  if (this->isStorageOptimized ()) {
3594  // left with the case that we have optimized storage. in this
3595  // case, we have to construct a list of row sizes.
3596  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3597  (this->getProfileType () != StaticProfile, std::logic_error,
3598  "The graph is not StaticProfile, but storage appears to be optimized."
3599  << suffix);
3600  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3601  (numRows != 0 && k_rowPtrs_.extent (0) == 0, std::logic_error,
3602  "The graph has " << numRows << " (> 0) row" << (numRows != 1 ? "s" : "")
3603  << " on the calling process, but the k_rowPtrs_ array has zero entries."
3604  << suffix);
3605  Teuchos::ArrayRCP<size_t> numEnt;
3606  if (numRows != 0) {
3607  numEnt = Teuchos::arcp<size_t> (numRows);
3608  }
3609 
3610  // We have to iterate through the row offsets anyway, so we
3611  // might as well check whether all rows' bounds are the same.
3612  bool allRowsReallySame = false;
3613  for (ptrdiff_t i = 0; i < numRows; ++i) {
3614  numEnt[i] = this->k_rowPtrs_(i+1) - this->k_rowPtrs_(i);
3615  if (i != 0 && numEnt[i] != numEnt[i-1]) {
3616  allRowsReallySame = false;
3617  }
3618  }
3619  if (allRowsReallySame) {
3620  if (numRows == 0) {
3621  numEntriesForAll = 0;
3622  } else {
3623  numEntriesForAll = numEnt[1] - numEnt[0];
3624  }
3625  allRowsSame = true;
3626  }
3627  else {
3628  numEntriesPerRow = numEnt; // Teuchos::arcp_const_cast<const size_t> (numEnt);
3629  allRowsSame = false; // conservatively; we don't check the array
3630  }
3631  }
3632  else if (k_numRowEntries_.extent (0) != 0) {
3633  // This is a shallow copy; the ArrayRCP wraps the View in a
3634  // custom destructor, which ensures correct deallocation if
3635  // that is the only reference to the View. Furthermore, this
3636  // View is a host View, so this doesn't assume UVM.
3637  numEntriesPerRow = Kokkos::Compat::persistingView (k_numRowEntries_);
3638  allRowsSame = false; // conservatively; we don't check the array
3639  }
3640  else {
3641  numEntriesForAll = 0;
3642  allRowsSame = true;
3643  }
3644  }
3645  else { // indices not allocated
3646  if (k_numAllocPerRow_.extent (0) != 0) {
3647  // This is a shallow copy; the ArrayRCP wraps the View in a
3648  // custom destructor, which ensures correct deallocation if
3649  // that is the only reference to the View. Furthermore, this
3650  // View is a host View, so this doesn't assume UVM.
3651  numEntriesPerRow = Kokkos::Compat::persistingView (k_numAllocPerRow_);
3652  allRowsSame = false; // conservatively; we don't check the array
3653  }
3654  else {
3655  numEntriesForAll = numAllocForAllRows_;
3656  allRowsSame = true;
3657  }
3658  }
3659 
3660  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3661  (numEntriesForAll != 0 && numEntriesPerRow.size () != 0, std::logic_error,
3662  "numEntriesForAll and numEntriesPerRow are not consistent. The former "
3663  "is nonzero (" << numEntriesForAll << "), but the latter has nonzero "
3664  "size " << numEntriesPerRow.size () << "." << suffix);
3665  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3666  (numEntriesForAll != 0 && ! allRowsSame, std::logic_error,
3667  "numEntriesForAll and allRowsSame are not consistent. The former "
3668  "is nonzero (" << numEntriesForAll << "), but the latter is false."
3669  << suffix);
3670  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3671  (numEntriesPerRow.size () != 0 && allRowsSame, std::logic_error,
3672  "numEntriesPerRow and allRowsSame are not consistent. The former has "
3673  "nonzero length " << numEntriesForAll << ", but the latter is true."
3674  << suffix);
3675 
3676  boundPerLocalRow = numEntriesPerRow;
3677  boundForAllLocalRows = numEntriesForAll;
3678  boundSameForAllLocalRows = allRowsSame;
3679  }
3680 
3681 
3682  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3683  void
3686  {
3687  using Teuchos::Comm;
3688  using Teuchos::outArg;
3689  using Teuchos::RCP;
3690  using Teuchos::rcp;
3691  using Teuchos::REDUCE_MAX;
3692  using Teuchos::REDUCE_MIN;
3693  using Teuchos::reduceAll;
3694  typedef CrsGraph<LocalOrdinal, GlobalOrdinal, Node> crs_graph_type;
3695  typedef LocalOrdinal LO;
3696  typedef GlobalOrdinal GO;
3697  typedef typename Teuchos::Array<GO>::size_type size_type;
3698  const char tfecfFuncName[] = "globalAssemble: "; // for exception macro
3699 
3700  RCP<const Comm<int> > comm = getComm ();
3701 
3702  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3703  (! isFillActive (), std::runtime_error, "Fill must be active before "
3704  "you may call this method.");
3705 
3706  const size_t myNumNonlocalRows = this->nonlocals_.size ();
3707 
3708  // If no processes have nonlocal rows, then we don't have to do
3709  // anything. Checking this is probably cheaper than constructing
3710  // the Map of nonlocal rows (see below) and noticing that it has
3711  // zero global entries.
3712  {
3713  const int iHaveNonlocalRows = (myNumNonlocalRows == 0) ? 0 : 1;
3714  int someoneHasNonlocalRows = 0;
3715  reduceAll<int, int> (*comm, REDUCE_MAX, iHaveNonlocalRows,
3716  outArg (someoneHasNonlocalRows));
3717  if (someoneHasNonlocalRows == 0) {
3718  return; // no process has nonlocal rows, so nothing to do
3719  }
3720  }
3721 
3722  // 1. Create a list of the "nonlocal" rows on each process. this
3723  // requires iterating over nonlocals_, so while we do this,
3724  // deduplicate the entries and get a count for each nonlocal
3725  // row on this process.
3726  // 2. Construct a new row Map corresponding to those rows. This
3727  // Map is likely overlapping. We know that the Map is not
3728  // empty on all processes, because the above all-reduce and
3729  // return exclude that case.
3730 
3731  RCP<const map_type> nonlocalRowMap;
3732  // Keep this for CrsGraph's constructor, so we can use StaticProfile.
3733  Teuchos::Array<size_t> numEntPerNonlocalRow (myNumNonlocalRows);
3734  {
3735  Teuchos::Array<GO> myNonlocalGblRows (myNumNonlocalRows);
3736  size_type curPos = 0;
3737  for (auto mapIter = this->nonlocals_.begin ();
3738  mapIter != this->nonlocals_.end ();
3739  ++mapIter, ++curPos) {
3740  myNonlocalGblRows[curPos] = mapIter->first;
3741  std::vector<GO>& gblCols = mapIter->second; // by ref; change in place
3742  std::sort (gblCols.begin (), gblCols.end ());
3743  auto vecLast = std::unique (gblCols.begin (), gblCols.end ());
3744  gblCols.erase (vecLast, gblCols.end ());
3745  numEntPerNonlocalRow[curPos] = gblCols.size ();
3746  }
3747 
3748  // Currently, Map requires that its indexBase be the global min
3749  // of all its global indices. Map won't compute this for us, so
3750  // we must do it. If our process has no nonlocal rows, set the
3751  // "min" to the max possible GO value. This ensures that if
3752  // some process has at least one nonlocal row, then it will pick
3753  // that up as the min. We know that at least one process has a
3754  // nonlocal row, since the all-reduce and return at the top of
3755  // this method excluded that case.
3756  GO myMinNonlocalGblRow = std::numeric_limits<GO>::max ();
3757  {
3758  auto iter = std::min_element (myNonlocalGblRows.begin (),
3759  myNonlocalGblRows.end ());
3760  if (iter != myNonlocalGblRows.end ()) {
3761  myMinNonlocalGblRow = *iter;
3762  }
3763  }
3764  GO gblMinNonlocalGblRow = 0;
3765  reduceAll<int, GO> (*comm, REDUCE_MIN, myMinNonlocalGblRow,
3766  outArg (gblMinNonlocalGblRow));
3767  const GO indexBase = gblMinNonlocalGblRow;
3768  const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid ();
3769  nonlocalRowMap = rcp (new map_type (INV, myNonlocalGblRows (), indexBase, comm));
3770  }
3771 
3772  // 3. Use the column indices for each nonlocal row, as stored in
3773  // nonlocals_, to construct a CrsGraph corresponding to
3774  // nonlocal rows. We may use StaticProfile, since we have
3775  // exact counts of the number of entries in each nonlocal row.
3776 
3777  RCP<crs_graph_type> nonlocalGraph =
3778  rcp (new crs_graph_type (nonlocalRowMap, numEntPerNonlocalRow (),
3779  StaticProfile));
3780  {
3781  size_type curPos = 0;
3782  for (auto mapIter = this->nonlocals_.begin ();
3783  mapIter != this->nonlocals_.end ();
3784  ++mapIter, ++curPos) {
3785  const GO gblRow = mapIter->first;
3786  std::vector<GO>& gblCols = mapIter->second; // by ref just to avoid copy
3787  const LO numEnt = static_cast<LO> (numEntPerNonlocalRow[curPos]);
3788  nonlocalGraph->insertGlobalIndices (gblRow, numEnt, gblCols.data ());
3789  }
3790  }
3791  // There's no need to fill-complete the nonlocals graph.
3792  // We just use it as a temporary container for the Export.
3793 
3794  // 4. If the original row Map is one to one, then we can Export
3795  // directly from nonlocalGraph into this. Otherwise, we have
3796  // to create a temporary graph with a one-to-one row Map,
3797  // Export into that, then Import from the temporary graph into
3798  // *this.
3799 
3800  auto origRowMap = this->getRowMap ();
3801  const bool origRowMapIsOneToOne = origRowMap->isOneToOne ();
3802 
3803  if (origRowMapIsOneToOne) {
3804  export_type exportToOrig (nonlocalRowMap, origRowMap);
3805  this->doExport (*nonlocalGraph, exportToOrig, Tpetra::INSERT);
3806  // We're done at this point!
3807  }
3808  else {
3809  // If you ask a Map whether it is one to one, it does some
3810  // communication and stashes intermediate results for later use
3811  // by createOneToOne. Thus, calling createOneToOne doesn't cost
3812  // much more then the original cost of calling isOneToOne.
3813  auto oneToOneRowMap = Tpetra::createOneToOne (origRowMap);
3814  export_type exportToOneToOne (nonlocalRowMap, oneToOneRowMap);
3815 
3816  // Create a temporary graph with the one-to-one row Map.
3817  //
3818  // TODO (mfh 09 Sep 2016) Estimate the number of entries in each
3819  // row, to avoid reallocation during the Export operation.
3820  crs_graph_type oneToOneGraph (oneToOneRowMap, 0);
3821  // Export from graph of nonlocals into the temp one-to-one graph.
3822  oneToOneGraph.doExport (*nonlocalGraph, exportToOneToOne, Tpetra::INSERT);
3823 
3824  // We don't need the graph of nonlocals anymore, so get rid of
3825  // it, to keep the memory high-water mark down.
3826  nonlocalGraph = Teuchos::null;
3827 
3828  // Import from the one-to-one graph to the original graph.
3829  import_type importToOrig (oneToOneRowMap, origRowMap);
3830  this->doImport (oneToOneGraph, importToOrig, Tpetra::INSERT);
3831  }
3832 
3833  // It's safe now to clear out nonlocals_, since we've already
3834  // committed side effects to *this. The standard idiom for
3835  // clearing a Container like std::map, is to swap it with an empty
3836  // Container and let the swapped Container fall out of scope.
3837  decltype (this->nonlocals_) newNonlocals;
3838  std::swap (this->nonlocals_, newNonlocals);
3839 
3840  checkInternalState ();
3841  }
3842 
3843 
3844  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3845  void
3847  resumeFill (const Teuchos::RCP<Teuchos::ParameterList>& params)
3848  {
3849  clearGlobalConstants();
3850  if (params != Teuchos::null) this->setParameterList (params);
3851  lowerTriangular_ = false;
3852  upperTriangular_ = false;
3853  // either still sorted/merged or initially sorted/merged
3854  indicesAreSorted_ = true;
3855  noRedundancies_ = true;
3856  fillComplete_ = false;
3857  }
3858 
3859 
3860  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3861  void
3863  fillComplete (const Teuchos::RCP<Teuchos::ParameterList>& params)
3864  {
3865  // If the graph already has domain and range Maps, don't clobber
3866  // them. If it doesn't, use the current row Map for both the
3867  // domain and range Maps.
3868  //
3869  // NOTE (mfh 28 Sep 2014): If the graph was constructed without a
3870  // column Map, and column indices are inserted which are not in
3871  // the row Map on any process, this will cause troubles. However,
3872  // that is not a common case for most applications that we
3873  // encounter, and checking for it might require more
3874  // communication.
3875  Teuchos::RCP<const map_type> domMap = this->getDomainMap ();
3876  if (domMap.is_null ()) {
3877  domMap = this->getRowMap ();
3878  }
3879  Teuchos::RCP<const map_type> ranMap = this->getRangeMap ();
3880  if (ranMap.is_null ()) {
3881  ranMap = this->getRowMap ();
3882  }
3883  this->fillComplete (domMap, ranMap, params);
3884  }
3885 
3886 
3887  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3888  void
3890  fillComplete (const Teuchos::RCP<const map_type>& domainMap,
3891  const Teuchos::RCP<const map_type>& rangeMap,
3892  const Teuchos::RCP<Teuchos::ParameterList>& params)
3893  {
3894  const char tfecfFuncName[] = "fillComplete: ";
3895  const bool debug = ::Tpetra::Details::Behavior::debug ();
3896 
3897  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3898  (! isFillActive () || isFillComplete (), std::runtime_error,
3899  "Graph fill state must be active (isFillActive() "
3900  "must be true) before calling fillComplete().");
3901 
3902  const int numProcs = getComm ()->getSize ();
3903 
3904  //
3905  // Read and set parameters
3906  //
3907 
3908  // Does the caller want to sort remote GIDs (within those owned by
3909  // the same process) in makeColMap()?
3910  if (! params.is_null ()) {
3911  if (params->isParameter ("sort column map ghost gids")) {
3912  sortGhostsAssociatedWithEachProcessor_ =
3913  params->get<bool> ("sort column map ghost gids",
3914  sortGhostsAssociatedWithEachProcessor_);
3915  }
3916  else if (params->isParameter ("Sort column Map ghost GIDs")) {
3917  sortGhostsAssociatedWithEachProcessor_ =
3918  params->get<bool> ("Sort column Map ghost GIDs",
3919  sortGhostsAssociatedWithEachProcessor_);
3920  }
3921  }
3922 
3923  // If true, the caller promises that no process did nonlocal
3924  // changes since the last call to fillComplete.
3925  bool assertNoNonlocalInserts = false;
3926  if (! params.is_null ()) {
3927  assertNoNonlocalInserts =
3928  params->get<bool> ("No Nonlocal Changes", assertNoNonlocalInserts);
3929  }
3930 
3931  //
3932  // Allocate indices, if they haven't already been allocated
3933  //
3934  if (! indicesAreAllocated ()) {
3935  if (hasColMap ()) {
3936  // We have a column Map, so use local indices.
3937  allocateIndices (LocalIndices);
3938  } else {
3939  // We don't have a column Map, so use global indices.
3940  allocateIndices (GlobalIndices);
3941  }
3942  }
3943 
3944  //
3945  // Do global assembly, if requested and if the communicator
3946  // contains more than one process.
3947  //
3948  const bool mayNeedGlobalAssemble = ! assertNoNonlocalInserts && numProcs > 1;
3949  if (mayNeedGlobalAssemble) {
3950  // This first checks if we need to do global assembly.
3951  // The check costs a single all-reduce.
3952  globalAssemble ();
3953  }
3954  else {
3955  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3956  (numProcs > 1 && this->nonlocals_.size() > 0, std::runtime_error,
3957  "The graph's communicator contains only one process, "
3958  "but there are nonlocal entries. "
3959  "This probably means that invalid entries were added to the graph.");
3960  }
3961 
3962  // Set domain and range Map. This may clear the Import / Export
3963  // objects if the new Maps differ from any old ones.
3964  setDomainRangeMaps (domainMap, rangeMap);
3965 
3966  // If the graph does not already have a column Map (either from
3967  // the user constructor calling the version of the constructor
3968  // that takes a column Map, or from a previous fillComplete call),
3969  // then create it.
3970  Teuchos::Array<int> remotePIDs (0);
3971  const bool mustBuildColMap = ! this->hasColMap ();
3972  if (mustBuildColMap) {
3973  this->makeColMap (remotePIDs); // resized on output
3974  }
3975 
3976  // Make indices local, if they aren't already.
3977  // The method doesn't do any work if the indices are already local.
3978  const std::pair<size_t, std::string> makeIndicesLocalResult =
3979  this->makeIndicesLocal ();
3980  if (debug) { // In debug mode, print error output on all processes
3981  using ::Tpetra::Details::gathervPrint;
3982  using Teuchos::RCP;
3983  using Teuchos::REDUCE_MIN;
3984  using Teuchos::reduceAll;
3985  using Teuchos::outArg;
3986 
3987  RCP<const map_type> map = this->getMap ();
3988  RCP<const Teuchos::Comm<int> > comm;
3989  if (! map.is_null ()) {
3990  comm = map->getComm ();
3991  }
3992  if (comm.is_null ()) {
3993  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3994  (makeIndicesLocalResult.first != 0, std::runtime_error,
3995  makeIndicesLocalResult.second);
3996  }
3997  else {
3998  const int lclSuccess = (makeIndicesLocalResult.first == 0);
3999  int gblSuccess = 0; // output argument
4000  reduceAll (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
4001  if (gblSuccess != 1) {
4002  std::ostringstream os;
4003  gathervPrint (os, makeIndicesLocalResult.second, *comm);
4004  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4005  (true, std::runtime_error, os.str ());
4006  }
4007  }
4008  }
4009  else {
4010  // TODO (mfh 20 Jul 2017) Instead of throwing here, pass along
4011  // the error state to makeImportExport or
4012  // computeGlobalConstants, which may do all-reduces and thus may
4013  // have the opportunity to communicate that error state.
4014  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4015  (makeIndicesLocalResult.first != 0, std::runtime_error,
4016  makeIndicesLocalResult.second);
4017  }
4018 
4019  // If this process has no indices, then CrsGraph considers it
4020  // already trivially sorted and merged. Thus, this method need
4021  // not be called on all processes in the row Map's communicator.
4022  this->sortAndMergeAllIndices (this->isSorted (), this->isMerged ());
4023 
4024  // Make Import and Export objects, if they haven't been made
4025  // already. If we made a column Map above, reuse information from
4026  // that process to avoid communiation in the Import setup.
4027  this->makeImportExport (remotePIDs, mustBuildColMap);
4028 
4029  // Create the Kokkos::StaticCrsGraph, if it doesn't already exist.
4030  this->fillLocalGraph (params);
4031 
4032  const bool callComputeGlobalConstants = params.get () == nullptr ||
4033  params->get ("compute global constants", true);
4034  const bool computeLocalTriangularConstants = params.get () == nullptr ||
4035  params->get ("compute local triangular constants", true);
4036  if (callComputeGlobalConstants) {
4037  this->computeGlobalConstants (computeLocalTriangularConstants);
4038  }
4039  else {
4040  this->computeLocalConstants (computeLocalTriangularConstants);
4041  }
4042  this->fillComplete_ = true;
4043  this->checkInternalState ();
4044  }
4045 
4046 
4047  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4048  void
4050  expertStaticFillComplete (const Teuchos::RCP<const map_type>& domainMap,
4051  const Teuchos::RCP<const map_type>& rangeMap,
4052  const Teuchos::RCP<const import_type>& importer,
4053  const Teuchos::RCP<const export_type>& exporter,
4054  const Teuchos::RCP<Teuchos::ParameterList>& params)
4055  {
4056  const char tfecfFuncName[] = "expertStaticFillComplete: ";
4057 #ifdef HAVE_TPETRA_MMM_TIMINGS
4058  std::string label;
4059  if(!params.is_null())
4060  label = params->get("Timer Label",label);
4061  std::string prefix = std::string("Tpetra ")+ label + std::string(": ");
4062  using Teuchos::TimeMonitor;
4063  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Setup"))));
4064 #endif
4065 
4066 
4067  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4068  domainMap.is_null () || rangeMap.is_null (),
4069  std::runtime_error, "The input domain Map and range Map must be nonnull.");
4070  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4071  pftype_ != StaticProfile, std::runtime_error, "You may not call this "
4072  "method unless the graph is StaticProfile.");
4073  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4074  isFillComplete () || ! hasColMap (), std::runtime_error, "You may not "
4075  "call this method unless the graph has a column Map.");
4076  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4077  getNodeNumRows () > 0 && k_rowPtrs_.extent (0) == 0,
4078  std::runtime_error, "The calling process has getNodeNumRows() = "
4079  << getNodeNumRows () << " > 0 rows, but the row offsets array has not "
4080  "been set.");
4081  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4082  static_cast<size_t> (k_rowPtrs_.extent (0)) != getNodeNumRows () + 1,
4083  std::runtime_error, "The row offsets array has length " <<
4084  k_rowPtrs_.extent (0) << " != getNodeNumRows()+1 = " <<
4085  (getNodeNumRows () + 1) << ".");
4086 
4087  // Note: We don't need to do the following things which are normally done in fillComplete:
4088  // allocateIndices, globalAssemble, makeColMap, makeIndicesLocal, sortAndMergeAllIndices
4089 
4090  // Constants from allocateIndices
4091  //
4092  // mfh 08 Aug 2014: numAllocForAllRows_ and k_numAllocPerRow_ go
4093  // away once the graph is allocated. expertStaticFillComplete
4094  // either presumes that the graph is allocated, or "allocates" it.
4095  //
4096  // FIXME (mfh 08 Aug 2014) The goal for the Kokkos refactor
4097  // version of CrsGraph is to allocate in the constructor, not
4098  // lazily on first insert. That will make both
4099  // numAllocForAllRows_ and k_numAllocPerRow_ obsolete.
4100  numAllocForAllRows_ = 0;
4101  k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
4102  indicesAreAllocated_ = true;
4103 
4104  // Constants from makeIndicesLocal
4105  //
4106  // The graph has a column Map, so its indices had better be local.
4107  indicesAreLocal_ = true;
4108  indicesAreGlobal_ = false;
4109 
4110  // set domain/range map: may clear the import/export objects
4111 #ifdef HAVE_TPETRA_MMM_TIMINGS
4112  MM = Teuchos::null;
4113  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Maps"))));
4114 #endif
4115  setDomainRangeMaps (domainMap, rangeMap);
4116 
4117  // Presume the user sorted and merged the arrays first
4118  indicesAreSorted_ = true;
4119  noRedundancies_ = true;
4120 
4121  // makeImportExport won't create a new importer/exporter if I set one here first.
4122 #ifdef HAVE_TPETRA_MMM_TIMINGS
4123  MM = Teuchos::null;
4124  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckI"))));
4125 #endif
4126 
4127  importer_ = Teuchos::null;
4128  exporter_ = Teuchos::null;
4129  if (importer != Teuchos::null) {
4130  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4131  ! importer->getSourceMap ()->isSameAs (*getDomainMap ()) ||
4132  ! importer->getTargetMap ()->isSameAs (*getColMap ()),
4133  std::invalid_argument,": importer does not match matrix maps.");
4134  importer_ = importer;
4135 
4136  }
4137 
4138 #ifdef HAVE_TPETRA_MMM_TIMINGS
4139  MM = Teuchos::null;
4140  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckE"))));
4141 #endif
4142 
4143  if (exporter != Teuchos::null) {
4144  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4145  ! exporter->getSourceMap ()->isSameAs (*getRowMap ()) ||
4146  ! exporter->getTargetMap ()->isSameAs (*getRangeMap ()),
4147  std::invalid_argument,": exporter does not match matrix maps.");
4148  exporter_ = exporter;
4149  }
4150 
4151 #ifdef HAVE_TPETRA_MMM_TIMINGS
4152  MM = Teuchos::null;
4153  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXmake"))));
4154 #endif
4155  Teuchos::Array<int> remotePIDs (0); // unused output argument
4156  this->makeImportExport (remotePIDs, false);
4157 
4158  // Since we have a StaticProfile, fillLocalGraph will do the right thing...
4159 #ifdef HAVE_TPETRA_MMM_TIMINGS
4160  MM = Teuchos::null;
4161  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-fLG"))));
4162 #endif
4163  this->fillLocalGraph (params);
4164 
4165  const bool callComputeGlobalConstants = params.get () == nullptr ||
4166  params->get ("compute global constants", true);
4167  const bool computeLocalTriangularConstants = params.get () == nullptr ||
4168  params->get ("compute local triangular constants", true);
4169 
4170  if (callComputeGlobalConstants) {
4171 #ifdef HAVE_TPETRA_MMM_TIMINGS
4172  MM = Teuchos::null;
4173  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cGC (const)"))));
4174 #endif // HAVE_TPETRA_MMM_TIMINGS
4175  this->computeGlobalConstants (computeLocalTriangularConstants);
4176  }
4177  else {
4178 #ifdef HAVE_TPETRA_MMM_TIMINGS
4179  MM = Teuchos::null;
4180  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cGC (noconst)"))));
4181 #endif // HAVE_TPETRA_MMM_TIMINGS
4182  this->computeLocalConstants (computeLocalTriangularConstants);
4183  }
4184 
4185  fillComplete_ = true;
4186 
4187 #ifdef HAVE_TPETRA_MMM_TIMINGS
4188  MM = Teuchos::null;
4189  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cIS"))));
4190 #endif
4191  checkInternalState ();
4192  }
4193 
4194 
4195  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4196  void
4198  fillLocalGraph (const Teuchos::RCP<Teuchos::ParameterList>& params)
4199  {
4201  typedef decltype (k_numRowEntries_) row_entries_type;
4202  typedef typename local_graph_type::row_map_type row_map_type;
4203  typedef typename row_map_type::non_const_type non_const_row_map_type;
4204  typedef typename local_graph_type::entries_type::non_const_type lclinds_1d_type;
4205  const char tfecfFuncName[] = "fillLocalGraph (called from fillComplete or "
4206  "expertStaticFillComplete): ";
4207  const bool debug = ::Tpetra::Details::Behavior::debug ();
4208  const size_t lclNumRows = this->getNodeNumRows ();
4209 
4210  // This method's goal is to fill in the two arrays (compressed
4211  // sparse row format) that define the sparse graph's structure.
4212  //
4213  // Use the nonconst version of row_map_type for ptr_d, because
4214  // the latter is const and we need to modify ptr_d here.
4215  non_const_row_map_type ptr_d;
4216  row_map_type ptr_d_const;
4217  lclinds_1d_type ind_d;
4218 
4219  bool requestOptimizedStorage = true;
4220  if (! params.is_null () && ! params->get ("Optimize Storage", true)) {
4221  requestOptimizedStorage = false;
4222  }
4223  if (this->getProfileType () != StaticProfile) {
4224  // Pack 2-D storage (DynamicProfile) into 1-D packed storage.
4225  //
4226  // DynamicProfile means that the graph's column indices are
4227  // currently stored in a 2-D "unpacked" format, in the
4228  // arrays-of-arrays lclInds2D_. We allocate 1-D storage
4229  // (ind_d) and then copy from 2-D storage (lclInds2D_) into 1-D
4230  // storage (ind_d).
4231  if (debug) {
4232  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4233  (static_cast<size_t> (this->k_numRowEntries_.extent (0)) !=
4234  lclNumRows, std::logic_error, "(DynamicProfile branch) "
4235  "k_numRowEntries_.extent(0) = " << k_numRowEntries_.extent (0)
4236  << " != getNodeNumRows() = " << lclNumRows << "");
4237  }
4238 
4239  // Pack the row offsets into ptr_d, by doing a sum-scan of the
4240  // array of valid entry counts per row (k_numRowEntries_). The
4241  // pack method can handle its counts input being a host View.
4242  //
4243  // Total number of entries in the matrix on the calling
4244  // process. We will compute this in the loop below. It's
4245  // cheap to compute and useful as a sanity check.
4246  size_t lclTotalNumEntries = 0;
4247  {
4248  // Allocate the packed row offsets array.
4249  ptr_d = non_const_row_map_type ("Tpetra::CrsGraph::ptr", lclNumRows+1);
4250  typename row_entries_type::const_type numRowEnt_h = k_numRowEntries_;
4251  // This function can handle that numRowEnt_h lives on host.
4252  lclTotalNumEntries = computeOffsetsFromCounts (ptr_d, numRowEnt_h);
4253  ptr_d_const = ptr_d;
4254  }
4255 
4256  if (debug) {
4257  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4258  (static_cast<size_t> (ptr_d.extent (0)) != lclNumRows + 1,
4259  std::logic_error, "(DynamicProfile branch) After packing ptr_d, "
4260  "ptr_d.extent(0) = " << ptr_d.extent (0) << " != "
4261  "(lclNumRows+1) = " << (lclNumRows+1) << ".");
4262  {
4263  const auto valToCheck =
4264  ::Tpetra::Details::getEntryOnHost (ptr_d, lclNumRows);
4265  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4266  (valToCheck != lclTotalNumEntries, std::logic_error,
4267  "(DynamicProfile branch) After packing ptr_d, ptr_d(lclNumRows = "
4268  << lclNumRows << ") = " << valToCheck << " != total number of "
4269  "entries on the calling process = " << lclTotalNumEntries << ".");
4270  }
4271  }
4272 
4273  // Allocate the array of packed column indices.
4274  ind_d = lclinds_1d_type ("Tpetra::CrsGraph::ind", lclTotalNumEntries);
4275  // Pack the column indices. We have to do this sequentially on
4276  // host, since lclInds2D_ is an ArrayRCP<Array<LO>>, which
4277  // doesn't work in parallel kernels (its iterators aren't even
4278  // thread safe in debug mode).
4279  {
4280  auto ptr_h = Kokkos::create_mirror_view (ptr_d);
4281  Kokkos::deep_copy (ptr_h, ptr_d); // we need the entries on host
4282  auto ind_h = Kokkos::create_mirror_view (ind_d); // will fill on host
4283 
4284  // k_numRowEntries_ is a host View already, so we can use it here.
4285  typename row_entries_type::const_type numRowEnt_h = k_numRowEntries_;
4286  for (size_t row = 0; row < lclNumRows; ++row) {
4287  const size_t numEnt = numRowEnt_h(row);
4288  std::copy (lclInds2D_[row].begin (),
4289  lclInds2D_[row].begin () + numEnt,
4290  ind_h.data () + ptr_h(row));
4291  }
4292  Kokkos::deep_copy (ind_d, ind_h);
4293  }
4294 
4295  if (debug) {
4296  // Sanity check of packed row offsets.
4297  if (ptr_d.extent (0) != 0) {
4298  const size_t numOffsets = static_cast<size_t> (ptr_d.extent (0));
4299  const size_t valToCheck =
4300  ::Tpetra::Details::getEntryOnHost (ptr_d, numOffsets - 1);
4301  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4302  (valToCheck != static_cast<size_t> (ind_d.extent (0)),
4303  std::logic_error, "(DynamicProfile branch) After packing column "
4304  "indices, ptr_d(" << (numOffsets-1) << ") = " << valToCheck
4305  << " != ind_d.extent(0) = " << ind_d.extent (0) << ".");
4306  }
4307  }
4308  }
4309  else if (getProfileType () == StaticProfile) {
4310  // StaticProfile means that the graph's column indices are
4311  // currently stored in a 1-D format, with row offsets in
4312  // k_rowPtrs_ and local column indices in k_lclInds1D_.
4313 
4314  if (debug) {
4315  // StaticProfile also means that the graph's array of row
4316  // offsets must already be allocated.
4317  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4318  (k_rowPtrs_.extent (0) == 0, std::logic_error,
4319  "(StaticProfile branch) k_rowPtrs_ has size zero, but shouldn't");
4320  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4321  (k_rowPtrs_.extent (0) != lclNumRows + 1, std::logic_error,
4322  "(StaticProfile branch) k_rowPtrs_.extent(0) = "
4323  << k_rowPtrs_.extent (0) << " != (lclNumRows + 1) = "
4324  << (lclNumRows + 1) << ".");
4325  {
4326  const size_t numOffsets = k_rowPtrs_.extent (0);
4327  const auto valToCheck =
4328  ::Tpetra::Details::getEntryOnHost (k_rowPtrs_, numOffsets - 1);
4329  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4330  (numOffsets != 0 &&
4331  k_lclInds1D_.extent (0) != valToCheck,
4332  std::logic_error, "(StaticProfile branch) numOffsets = " <<
4333  numOffsets << " != 0 and k_lclInds1D_.extent(0) = " <<
4334  k_lclInds1D_.extent (0) << " != k_rowPtrs_(" << numOffsets <<
4335  ") = " << valToCheck << ".");
4336  }
4337  }
4338 
4339  size_t allocSize = 0;
4340  try {
4341  allocSize = this->getNodeAllocationSize ();
4342  }
4343  catch (std::logic_error& e) {
4344  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4345  (true, std::logic_error, "getNodeAllocationSize threw "
4346  "std::logic_error: " << e.what ());
4347  }
4348  catch (std::runtime_error& e) {
4349  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4350  (true, std::runtime_error, "getNodeAllocationSize threw "
4351  "std::runtime_error: " << e.what ());
4352  }
4353  catch (std::exception& e) {
4354  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4355  (true, std::runtime_error, "getNodeAllocationSize threw "
4356  "std::exception: " << e.what ());
4357  }
4358  catch (...) {
4359  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4360  (true, std::runtime_error, "getNodeAllocationSize threw "
4361  "an exception not a subclass of std::exception.");
4362  }
4363 
4364  if (this->getNodeNumEntries () != allocSize) {
4365  // The graph's current 1-D storage is "unpacked." This means
4366  // the row offsets may differ from what the final row offsets
4367  // should be. This could happen, for example, if the user
4368  // specified StaticProfile in the constructor and set an upper
4369  // bound on the number of entries in each row, but didn't fill
4370  // all those entries.
4371 
4372  if (debug) {
4373  if (k_rowPtrs_.extent (0) != 0) {
4374  const size_t numOffsets =
4375  static_cast<size_t> (k_rowPtrs_.extent (0));
4376  const auto valToCheck =
4377  ::Tpetra::Details::getEntryOnHost (k_rowPtrs_, numOffsets - 1);
4378  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4379  (valToCheck != static_cast<size_t> (k_lclInds1D_.extent (0)),
4380  std::logic_error, "(StaticProfile unpacked branch) Before "
4381  "allocating or packing, k_rowPtrs_(" << (numOffsets-1) << ") = "
4382  << valToCheck << " != k_lclInds1D_.extent(0) = "
4383  << k_lclInds1D_.extent (0) << ".");
4384  }
4385  }
4386 
4387  // Pack the row offsets into ptr_d, by doing a sum-scan of the
4388  // array of valid entry counts per row (k_numRowEntries_).
4389 
4390  // Total number of entries in the matrix on the calling
4391  // process. We will compute this in the loop below. It's
4392  // cheap to compute and useful as a sanity check.
4393  size_t lclTotalNumEntries = 0;
4394  {
4395  // Allocate the packed row offsets array.
4396  ptr_d = non_const_row_map_type ("Tpetra::CrsGraph::ptr", lclNumRows + 1);
4397  ptr_d_const = ptr_d;
4398 
4399  // It's ok that k_numRowEntries_ is a host View; the
4400  // function can handle this.
4401  typename row_entries_type::const_type numRowEnt_h = k_numRowEntries_;
4402  if (debug) {
4403  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4404  (static_cast<size_t> (numRowEnt_h.extent (0)) != lclNumRows,
4405  std::logic_error, "(StaticProfile unpacked branch) "
4406  "numRowEnt_h.extent(0) = " << numRowEnt_h.extent (0)
4407  << " != getNodeNumRows() = " << lclNumRows << "");
4408  }
4409 
4410  lclTotalNumEntries = computeOffsetsFromCounts (ptr_d, numRowEnt_h);
4411 
4412  if (debug) {
4413  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4414  (static_cast<size_t> (ptr_d.extent (0)) != lclNumRows + 1,
4415  std::logic_error, "(StaticProfile unpacked branch) After "
4416  "allocating ptr_d, ptr_d.extent(0) = " << ptr_d.extent (0)
4417  << " != lclNumRows+1 = " << (lclNumRows+1) << ".");
4418  {
4419  const auto valToCheck =
4420  ::Tpetra::Details::getEntryOnHost (ptr_d, lclNumRows);
4421  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4422  (valToCheck != lclTotalNumEntries, std::logic_error,
4423  "Tpetra::CrsGraph::fillLocalGraph: In StaticProfile unpacked "
4424  "branch, after filling ptr_d, ptr_d(lclNumRows=" << lclNumRows
4425  << ") = " << valToCheck << " != total number of entries on "
4426  "the calling process = " << lclTotalNumEntries << ".");
4427  }
4428  }
4429  }
4430 
4431  // Allocate the array of packed column indices.
4432  ind_d = lclinds_1d_type ("Tpetra::CrsGraph::ind", lclTotalNumEntries);
4433 
4434  // k_rowPtrs_ and k_lclInds1D_ are currently unpacked. Pack
4435  // them, using the packed row offsets array ptr_d that we
4436  // created above.
4437  //
4438  // FIXME (mfh 08 Aug 2014) If "Optimize Storage" is false (in
4439  // CrsMatrix?), we need to keep around the unpacked row
4440  // offsets and column indices.
4441 
4442  // Pack the column indices from unpacked k_lclInds1D_ into
4443  // packed ind_d. We will replace k_lclInds1D_ below.
4444  typedef pack_functor<
4445  typename local_graph_type::entries_type::non_const_type,
4446  row_map_type> inds_packer_type;
4447  inds_packer_type f (ind_d, k_lclInds1D_, ptr_d, k_rowPtrs_);
4448  {
4449  typedef typename decltype (ind_d)::execution_space exec_space;
4450  typedef Kokkos::RangePolicy<exec_space, LocalOrdinal> range_type;
4451  Kokkos::parallel_for (range_type (0, lclNumRows), f);
4452  }
4453 
4454  if (debug) {
4455  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4456  (ptr_d.extent (0) == 0, std::logic_error, "(StaticProfile "
4457  "\"Optimize Storage\"=true branch) After packing, "
4458  "ptr_d.extent(0) = 0. This probably means k_rowPtrs_ was "
4459  "never allocated.");
4460  if (ptr_d.extent (0) != 0) {
4461  const size_t numOffsets = static_cast<size_t> (ptr_d.extent (0));
4462  const auto valToCheck =
4463  ::Tpetra::Details::getEntryOnHost (ptr_d, numOffsets - 1);
4464  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4465  (static_cast<size_t> (valToCheck) != ind_d.extent (0),
4466  std::logic_error, "(StaticProfile \"Optimize Storage\"=true "
4467  "branch) After packing, ptr_d(" << (numOffsets-1) << ") = "
4468  << valToCheck << " != ind_d.extent(0) = "
4469  << ind_d.extent (0) << ".");
4470  }
4471  }
4472  }
4473  else { // We don't have to pack, so just set the pointers.
4474  ptr_d_const = k_rowPtrs_;
4475  ind_d = k_lclInds1D_;
4476 
4477  if (debug) {
4478  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4479  (ptr_d_const.extent (0) == 0, std::logic_error, "(StaticProfile "
4480  "\"Optimize Storage\"=false branch) ptr_d_const.extent(0) = 0. "
4481  "This probably means that k_rowPtrs_ was never allocated.");
4482  if (ptr_d_const.extent (0) != 0) {
4483  const size_t numOffsets =
4484  static_cast<size_t> (ptr_d_const.extent (0));
4485  const size_t valToCheck =
4486  ::Tpetra::Details::getEntryOnHost (ptr_d_const, numOffsets - 1);
4487  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4488  (valToCheck != static_cast<size_t> (ind_d.extent (0)),
4489  std::logic_error, "(StaticProfile \"Optimize Storage\"=false "
4490  "branch) ptr_d_const(" << (numOffsets-1) << ") = " << valToCheck
4491  << " != ind_d.extent(0) = " << ind_d.extent (0) << ".");
4492  }
4493  }
4494  }
4495  }
4496 
4497  if (debug) {
4498  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4499  (static_cast<size_t> (ptr_d_const.extent (0)) != lclNumRows + 1,
4500  std::logic_error, "After packing, ptr_d_const.extent(0) = " <<
4501  ptr_d_const.extent (0) << " != lclNumRows+1 = " << (lclNumRows+1)
4502  << ".");
4503  if (ptr_d_const.extent (0) != 0) {
4504  const size_t numOffsets = static_cast<size_t> (ptr_d_const.extent (0));
4505  const auto valToCheck =
4506  ::Tpetra::Details::getEntryOnHost (ptr_d_const, numOffsets - 1);
4507  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4508  (static_cast<size_t> (valToCheck) != ind_d.extent (0),
4509  std::logic_error, "After packing, ptr_d_const(" << (numOffsets-1)
4510  << ") = " << valToCheck << " != ind_d.extent(0) = "
4511  << ind_d.extent (0) << ".");
4512  }
4513  }
4514 
4515  if (requestOptimizedStorage) {
4516  // With optimized storage, we don't need to store the 2-D column
4517  // indices array-of-arrays, or the array of row entry counts.
4518 
4519  // Free graph data structures that are only needed for 2-D or
4520  // unpacked 1-D storage.
4521  lclInds2D_ = Teuchos::null;
4522  k_numRowEntries_ = row_entries_type ();
4523 
4524  // Keep the new 1-D packed allocations.
4525  k_rowPtrs_ = ptr_d_const;
4526  k_lclInds1D_ = ind_d;
4527 
4528  // The graph is definitely StaticProfile now, whether or not it
4529  // was before.
4530  pftype_ = StaticProfile;
4531  storageStatus_ = ::Tpetra::Details::STORAGE_1D_PACKED;
4532  }
4533 
4534  // FIXME (mfh 28 Aug 2014) "Local Graph" sublist no longer used.
4535 
4536  // Build the local graph.
4537  lclGraph_ = local_graph_type (ind_d, ptr_d_const);
4538  }
4539 
4540  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4541  void
4543  replaceColMap (const Teuchos::RCP<const map_type>& newColMap)
4544  {
4545  // NOTE: This safety check matches the code, but not the documentation of Crsgraph
4546  //
4547  // FIXME (mfh 18 Aug 2014) This will break if the calling process
4548  // has no entries, because in that case, currently it is neither
4549  // locally nor globally indexed. This will change once we get rid
4550  // of lazy allocation (so that the constructor allocates indices
4551  // and therefore commits to local vs. global).
4552  const char tfecfFuncName[] = "replaceColMap: ";
4553  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4554  isLocallyIndexed () || isGloballyIndexed (), std::runtime_error,
4555  "Requires matching maps and non-static graph.");
4556  colMap_ = newColMap;
4557  }
4558 
4559  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4560  void
4562  reindexColumns (const Teuchos::RCP<const map_type>& newColMap,
4563  const Teuchos::RCP<const import_type>& newImport,
4564  const bool sortIndicesInEachRow)
4565  {
4566  using Teuchos::REDUCE_MIN;
4567  using Teuchos::reduceAll;
4568  using Teuchos::RCP;
4569  typedef GlobalOrdinal GO;
4570  typedef LocalOrdinal LO;
4571  typedef typename local_graph_type::entries_type::non_const_type col_inds_type;
4572  const char tfecfFuncName[] = "reindexColumns: ";
4573 
4574  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4575  isFillComplete (), std::runtime_error, "The graph is fill complete "
4576  "(isFillComplete() returns true). You must call resumeFill() before "
4577  "you may call this method.");
4578 
4579  // mfh 19 Aug 2014: This method does NOT redistribute data; it
4580  // doesn't claim to do the work of an Import or Export. This
4581  // means that for all processes, the calling process MUST own all
4582  // column indices, in both the old column Map (if it exists) and
4583  // the new column Map. We check this via an all-reduce.
4584  //
4585  // Some processes may be globally indexed, others may be locally
4586  // indexed, and others (that have no graph entries) may be
4587  // neither. This method will NOT change the graph's current
4588  // state. If it's locally indexed, it will stay that way, and
4589  // vice versa. It would easy to add an option to convert indices
4590  // from global to local, so as to save a global-to-local
4591  // conversion pass. However, we don't do this here. The intended
4592  // typical use case is that the graph already has a column Map and
4593  // is locally indexed, and this is the case for which we optimize.
4594 
4595  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
4596 
4597  // Attempt to convert indices to the new column Map's version of
4598  // local. This will fail if on the calling process, the graph has
4599  // indices that are not on that process in the new column Map.
4600  // After the local conversion attempt, we will do an all-reduce to
4601  // see if any processes failed.
4602 
4603  // If this is false, then either the graph contains a column index
4604  // which is invalid in the CURRENT column Map, or the graph is
4605  // locally indexed but currently has no column Map. In either
4606  // case, there is no way to convert the current local indices into
4607  // global indices, so that we can convert them into the new column
4608  // Map's local indices. It's possible for this to be true on some
4609  // processes but not others, due to replaceColMap.
4610  bool allCurColIndsValid = true;
4611  // On the calling process, are all valid current column indices
4612  // also in the new column Map on the calling process? In other
4613  // words, does local reindexing suffice, or should the user have
4614  // done an Import or Export instead?
4615  bool localSuffices = true;
4616 
4617  // Final arrays for the local indices. We will allocate exactly
4618  // one of these ONLY if the graph is locally indexed on the
4619  // calling process, and ONLY if the graph has one or more entries
4620  // (is not empty) on the calling process. In that case, we
4621  // allocate the first (1-D storage) if the graph has a static
4622  // profile, else we allocate the second (2-D storage).
4623  typename local_graph_type::entries_type::non_const_type newLclInds1D;
4624  Teuchos::ArrayRCP<Teuchos::Array<LO> > newLclInds2D;
4625 
4626  // If indices aren't allocated, that means the calling process
4627  // owns no entries in the graph. Thus, there is nothing to
4628  // convert, and it trivially succeeds locally.
4629  if (indicesAreAllocated ()) {
4630  if (isLocallyIndexed ()) {
4631  if (hasColMap ()) { // locally indexed, and currently has a column Map
4632  const map_type& oldColMap = * (getColMap ());
4633  if (pftype_ == StaticProfile) {
4634  // Allocate storage for the new local indices.
4635  const size_t allocSize = this->getNodeAllocationSize ();
4636  newLclInds1D = col_inds_type ("Tpetra::CrsGraph::ind", allocSize);
4637  // Attempt to convert the new indices locally.
4638  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
4639  const RowInfo rowInfo = this->getRowInfo (lclRow);
4640  const size_t beg = rowInfo.offset1D;
4641  const size_t end = beg + rowInfo.numEntries;
4642  for (size_t k = beg; k < end; ++k) {
4643  // FIXME (mfh 21 Aug 2014) This assumes UVM. Should
4644  // use a DualView instead.
4645  const LO oldLclCol = k_lclInds1D_(k);
4646  if (oldLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
4647  allCurColIndsValid = false;
4648  break; // Stop at the first invalid index
4649  }
4650  const GO gblCol = oldColMap.getGlobalElement (oldLclCol);
4651 
4652  // The above conversion MUST succeed. Otherwise, the
4653  // current local index is invalid, which means that
4654  // the graph was constructed incorrectly.
4655  if (gblCol == Teuchos::OrdinalTraits<GO>::invalid ()) {
4656  allCurColIndsValid = false;
4657  break; // Stop at the first invalid index
4658  }
4659  else {
4660  const LO newLclCol = newColMap->getLocalElement (gblCol);
4661  if (newLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
4662  localSuffices = false;
4663  break; // Stop at the first invalid index
4664  }
4665  // FIXME (mfh 21 Aug 2014) This assumes UVM. Should
4666  // use a DualView instead.
4667  newLclInds1D(k) = newLclCol;
4668  }
4669  } // for each entry in the current row
4670  } // for each locally owned row
4671  }
4672  else { // pftype_ == DynamicProfile
4673  // Allocate storage for the new local indices. We only
4674  // allocate the outer array here; we will allocate the
4675  // inner arrays below.
4676  newLclInds2D = Teuchos::arcp<Teuchos::Array<LO> > (lclNumRows);
4677 
4678  // Attempt to convert the new indices locally.
4679  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
4680  const RowInfo rowInfo = this->getRowInfo (lclRow);
4681  newLclInds2D.resize (rowInfo.allocSize);
4682 
4683  Teuchos::ArrayView<const LO> oldLclRowView = getLocalView (rowInfo);
4684  Teuchos::ArrayView<LO> newLclRowView = (newLclInds2D[lclRow]) ();
4685 
4686  for (size_t k = 0; k < rowInfo.numEntries; ++k) {
4687  const LO oldLclCol = oldLclRowView[k];
4688  if (oldLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
4689  allCurColIndsValid = false;
4690  break; // Stop at the first invalid index
4691  }
4692  const GO gblCol = oldColMap.getGlobalElement (oldLclCol);
4693 
4694  // The above conversion MUST succeed. Otherwise, the
4695  // local index is invalid and the graph is wrong.
4696  if (gblCol == Teuchos::OrdinalTraits<GO>::invalid ()) {
4697  allCurColIndsValid = false;
4698  break; // Stop at the first invalid index
4699  }
4700  else {
4701  const LO newLclCol = newColMap->getLocalElement (gblCol);
4702  if (newLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
4703  localSuffices = false;
4704  break; // Stop at the first invalid index.
4705  }
4706  newLclRowView[k] = newLclCol;
4707  }
4708  } // for each entry in the current row
4709  } // for each locally owned row
4710  } // pftype_
4711  }
4712  else { // locally indexed, but no column Map
4713  // This case is only possible if replaceColMap() was called
4714  // with a null argument on the calling process. It's
4715  // possible, but it means that this method can't possibly
4716  // succeed, since we have no way of knowing how to convert
4717  // the current local indices to global indices.
4718  allCurColIndsValid = false;
4719  }
4720  }
4721  else { // globally indexed
4722  // If the graph is globally indexed, we don't need to save
4723  // local indices, but we _do_ need to know whether the current
4724  // global indices are valid in the new column Map. We may
4725  // need to do a getRemoteIndexList call to find this out.
4726  //
4727  // In this case, it doesn't matter whether the graph currently
4728  // has a column Map. We don't need the old column Map to
4729  // convert from global indices to the _new_ column Map's local
4730  // indices. Furthermore, we can use the same code, whether
4731  // the graph is static or dynamic profile.
4732 
4733  // Test whether the current global indices are in the new
4734  // column Map on the calling process.
4735  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
4736  const RowInfo rowInfo = this->getRowInfo (lclRow);
4737  Teuchos::ArrayView<const GO> oldGblRowView = getGlobalView (rowInfo);
4738  for (size_t k = 0; k < rowInfo.numEntries; ++k) {
4739  const GO gblCol = oldGblRowView[k];
4740  if (! newColMap->isNodeGlobalElement (gblCol)) {
4741  localSuffices = false;
4742  break; // Stop at the first invalid index
4743  }
4744  } // for each entry in the current row
4745  } // for each locally owned row
4746  } // locally or globally indexed
4747  } // whether indices are allocated
4748 
4749  // Do an all-reduce to check both possible error conditions.
4750  int lclSuccess[2];
4751  lclSuccess[0] = allCurColIndsValid ? 1 : 0;
4752  lclSuccess[1] = localSuffices ? 1 : 0;
4753  int gblSuccess[2];
4754  gblSuccess[0] = 0;
4755  gblSuccess[1] = 0;
4756  RCP<const Teuchos::Comm<int> > comm =
4757  getRowMap ().is_null () ? Teuchos::null : getRowMap ()->getComm ();
4758  if (! comm.is_null ()) {
4759  reduceAll<int, int> (*comm, REDUCE_MIN, 2, lclSuccess, gblSuccess);
4760  }
4761 
4762  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4763  gblSuccess[0] == 0, std::runtime_error, "It is not possible to continue."
4764  " The most likely reason is that the graph is locally indexed, but the "
4765  "column Map is missing (null) on some processes, due to a previous call "
4766  "to replaceColMap().");
4767 
4768  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4769  gblSuccess[1] == 0, std::runtime_error, "On some process, the graph "
4770  "contains column indices that are in the old column Map, but not in the "
4771  "new column Map (on that process). This method does NOT redistribute "
4772  "data; it does not claim to do the work of an Import or Export operation."
4773  " This means that for all processess, the calling process MUST own all "
4774  "column indices, in both the old column Map and the new column Map. In "
4775  "this case, you will need to do an Import or Export operation to "
4776  "redistribute data.");
4777 
4778  // Commit the results.
4779  if (isLocallyIndexed ()) {
4780  if (pftype_ == StaticProfile) {
4781  k_lclInds1D_ = newLclInds1D;
4782  } else { // dynamic profile
4783  lclInds2D_ = newLclInds2D;
4784  }
4785  // We've reindexed, so we don't know if the indices are sorted.
4786  //
4787  // FIXME (mfh 17 Sep 2014) It could make sense to check this,
4788  // since we're already going through all the indices above. We
4789  // could also sort each row in place; that way, we would only
4790  // have to make one pass over the rows.
4791  indicesAreSorted_ = false;
4792  if (sortIndicesInEachRow) {
4793  // NOTE (mfh 17 Sep 2014) The graph must be locally indexed in
4794  // order to call this method.
4795  //
4796  // FIXME (mfh 17 Sep 2014) This violates the strong exception
4797  // guarantee. It would be better to sort the new index arrays
4798  // before committing them.
4799  const bool sorted = false; // need to resort
4800  const bool merged = true; // no need to merge, since no dups
4801  this->sortAndMergeAllIndices (sorted, merged);
4802  }
4803  }
4804  colMap_ = newColMap;
4805 
4806  if (newImport.is_null ()) {
4807  // FIXME (mfh 19 Aug 2014) Should use the above all-reduce to
4808  // check whether the input Import is null on any process.
4809  //
4810  // If the domain Map hasn't been set yet, we can't compute a new
4811  // Import object. Leave it what it is; it should be null, but
4812  // it doesn't matter. If the domain Map _has_ been set, then
4813  // compute a new Import object if necessary.
4814  if (! domainMap_.is_null ()) {
4815  if (! domainMap_->isSameAs (* newColMap)) {
4816  importer_ = Teuchos::rcp (new import_type (domainMap_, newColMap));
4817  } else {
4818  importer_ = Teuchos::null; // don't need an Import
4819  }
4820  }
4821  } else {
4822  // The caller gave us an Import object. Assume that it's valid.
4823  importer_ = newImport;
4824  }
4825  }
4826 
4827 
4828  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4829  void
4831  replaceDomainMapAndImporter (const Teuchos::RCP<const map_type>& newDomainMap,
4832  const Teuchos::RCP<const import_type>& newImporter)
4833  {
4834  const char prefix[] = "Tpetra::CrsGraph::replaceDomainMapAndImporter: ";
4835  TEUCHOS_TEST_FOR_EXCEPTION(
4836  colMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4837  "this method unless the graph already has a column Map.");
4838  TEUCHOS_TEST_FOR_EXCEPTION(
4839  newDomainMap.is_null (), std::invalid_argument,
4840  prefix << "The new domain Map must be nonnull.");
4841 
4842  const bool debug = ::Tpetra::Details::Behavior::debug ();
4843  if (debug) {
4844  if (newImporter.is_null ()) {
4845  // It's not a good idea to put expensive operations in a macro
4846  // clause, even if they are side effect - free, because macros
4847  // don't promise that they won't evaluate their arguments more
4848  // than once. It's polite for them to do so, but not required.
4849  const bool colSameAsDom = colMap_->isSameAs (*newDomainMap);
4850  TEUCHOS_TEST_FOR_EXCEPTION
4851  (colSameAsDom, std::invalid_argument, "If the new Import is null, "
4852  "then the new domain Map must be the same as the current column Map.");
4853  }
4854  else {
4855  const bool colSameAsTgt =
4856  colMap_->isSameAs (* (newImporter->getTargetMap ()));
4857  const bool newDomSameAsSrc =
4858  newDomainMap->isSameAs (* (newImporter->getSourceMap ()));
4859  TEUCHOS_TEST_FOR_EXCEPTION
4860  (! colSameAsTgt || ! newDomSameAsSrc, std::invalid_argument, "If the "
4861  "new Import is nonnull, then the current column Map must be the same "
4862  "as the new Import's target Map, and the new domain Map must be the "
4863  "same as the new Import's source Map.");
4864  }
4865  }
4866 
4867  domainMap_ = newDomainMap;
4868  importer_ = Teuchos::rcp_const_cast<import_type> (newImporter);
4869  }
4870 
4871  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4875  {
4876  return lclGraph_;
4877  }
4878 
4879  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4880  void
4882  computeGlobalConstants (const bool computeLocalTriangularConstants)
4883  {
4884  using ::Tpetra::Details::ProfilingRegion;
4885  using Teuchos::ArrayView;
4886  using Teuchos::outArg;
4887  using Teuchos::reduceAll;
4888  typedef global_size_t GST;
4889 
4890  ProfilingRegion regionCGC ("Tpetra::CrsGraph::computeGlobalConstants");
4891 
4892  this->computeLocalConstants (computeLocalTriangularConstants);
4893 
4894  // Compute global constants from local constants. Processes that
4895  // already have local constants still participate in the
4896  // all-reduces, using their previously computed values.
4897  if (! this->haveGlobalConstants_) {
4898  const Teuchos::Comm<int>& comm = * (this->getComm ());
4899  // Promote all the nodeNum* and nodeMaxNum* quantities from
4900  // size_t to global_size_t, when doing the all-reduces for
4901  // globalNum* / globalMaxNum* results.
4902  //
4903  // FIXME (mfh 07 May 2013) Unfortunately, we either have to do
4904  // this in two all-reduces (one for the sum and the other for
4905  // the max), or use a custom MPI_Op that combines the sum and
4906  // the max. The latter might even be slower than two
4907  // all-reduces on modern network hardware. It would also be a
4908  // good idea to use nonblocking all-reduces (MPI 3), so that we
4909  // don't have to wait around for the first one to finish before
4910  // starting the second one.
4911  GST lcl[2], gbl[2];
4912  lcl[0] = static_cast<GST> (this->getNodeNumEntries ());
4913 
4914  // mfh 03 May 2018: nodeNumDiags_ is invalid if
4915  // computeLocalTriangularConstants is false, but there's no
4916  // practical network latency difference between an all-reduce of
4917  // length 1 and an all-reduce of length 2, so it's not worth
4918  // distinguishing between the two. However, we do want to avoid
4919  // integer overflow, so we'll just set the input local sum to
4920  // zero in that case.
4921  lcl[1] = computeLocalTriangularConstants ?
4922  static_cast<GST> (this->nodeNumDiags_) :
4923  static_cast<GST> (0);
4924 
4925  reduceAll<int,GST> (comm, Teuchos::REDUCE_SUM, 2, lcl, gbl);
4926  this->globalNumEntries_ = gbl[0];
4927 
4928  // mfh 03 May 2018: If not computing local triangular
4929  // properties, users want this to be invalid, not just zero.
4930  // This will help with debugging.
4931  this->globalNumDiags_ = computeLocalTriangularConstants ?
4932  gbl[1] :
4933  Teuchos::OrdinalTraits<GST>::invalid ();
4934 
4935  const GST lclMaxNumRowEnt = static_cast<GST> (this->nodeMaxNumRowEntries_);
4936  reduceAll<int, GST> (comm, Teuchos::REDUCE_MAX, lclMaxNumRowEnt,
4937  outArg (this->globalMaxNumRowEntries_));
4938  this->haveGlobalConstants_ = true;
4939  }
4940  }
4941 
4942 
4943  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4944  void
4946  computeLocalConstants (const bool computeLocalTriangularConstants)
4947  {
4949  using ::Tpetra::Details::ProfilingRegion;
4950 
4951  ProfilingRegion regionCLC ("Tpetra::CrsGraph::computeLocalConstants");
4952  if (this->haveLocalConstants_) {
4953  return;
4954  }
4955 
4956  // Reset local properties
4957  this->lowerTriangular_ = false;
4958  this->upperTriangular_ = false;
4959  this->nodeMaxNumRowEntries_ = Teuchos::OrdinalTraits<size_t>::invalid ();
4960  this->nodeNumDiags_ = Teuchos::OrdinalTraits<size_t>::invalid ();
4961 
4962  if (computeLocalTriangularConstants) {
4963  const bool hasRowAndColumnMaps =
4964  this->rowMap_.get () != nullptr && this->colMap_.get () != nullptr;
4965  if (hasRowAndColumnMaps) {
4966  auto lclRowMap = this->rowMap_->getLocalMap ();
4967  auto lclColMap = this->colMap_->getLocalMap ();
4968 
4969  // Make sure that the GPU can see any updates made on host.
4970  // This code only reads the local graph, so we don't need a
4971  // fence afterwards.
4972  execution_space().fence ();
4973 
4974  // mfh 01 May 2018: See GitHub Issue #2658.
4975  constexpr bool ignoreMapsForTriStruct = true;
4976  auto result =
4977  determineLocalTriangularStructure (this->lclGraph_, lclRowMap,
4978  lclColMap, ignoreMapsForTriStruct);
4979  this->lowerTriangular_ = result.couldBeLowerTriangular;
4980  this->upperTriangular_ = result.couldBeUpperTriangular;
4981  this->nodeMaxNumRowEntries_ = result.maxNumRowEnt;
4982  this->nodeNumDiags_ = result.diagCount;
4983  }
4984  else {
4985  this->nodeMaxNumRowEntries_ = 0;
4986  this->nodeNumDiags_ = 0;
4987  }
4988  }
4989  else {
4990  using LO = local_ordinal_type;
4991  // Make sure that the GPU can see any updates made on host.
4992  // This code only reads the local graph, so we don't need a
4993  // fence afterwards.
4994  execution_space().fence ();
4995 
4996  auto ptr = this->lclGraph_.row_map;
4997  const LO lclNumRows = ptr.extent(0) == 0 ?
4998  static_cast<LO> (0) :
4999  (static_cast<LO> (ptr.extent(0)) - static_cast<LO> (1));
5000 
5001  const LO lclMaxNumRowEnt =
5002  ::Tpetra::Details::maxDifference ("Tpetra::CrsGraph: nodeMaxNumRowEntries",
5003  ptr, lclNumRows);
5004  this->nodeMaxNumRowEntries_ = static_cast<size_t> (lclMaxNumRowEnt);
5005  }
5006  this->haveLocalConstants_ = true;
5007  }
5008 
5009 
5010  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5011  std::pair<size_t, std::string>
5014  {
5015  using ::Tpetra::Details::ProfilingRegion;
5016  using Teuchos::arcp;
5017  using Teuchos::Array;
5018  using std::endl;
5019  typedef LocalOrdinal LO;
5020  typedef GlobalOrdinal GO;
5021  typedef device_type DT;
5022  typedef typename local_graph_type::row_map_type::non_const_value_type offset_type;
5023  typedef decltype (k_numRowEntries_) row_entries_type;
5024  typedef typename row_entries_type::non_const_value_type num_ent_type;
5025  typedef typename local_graph_type::entries_type::non_const_type
5026  lcl_col_inds_type;
5027  typedef Kokkos::View<GO*, typename lcl_col_inds_type::array_layout,
5028  device_type> gbl_col_inds_type;
5029  const char tfecfFuncName[] = "makeIndicesLocal: ";
5030  ProfilingRegion regionMakeIndicesLocal ("Tpetra::CrsGraph::makeIndicesLocal");
5031 
5032  // These are somewhat global properties, so it's safe to have
5033  // exception checks for them, rather than returning an error code.
5034  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5035  (! this->hasColMap (), std::logic_error, "The graph does not have a "
5036  "column Map yet. This method should never be called in that case. "
5037  "Please report this bug to the Tpetra developers.");
5038  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5039  (this->getColMap ().is_null (), std::logic_error, "The graph claims "
5040  "that it has a column Map, because hasColMap() returns true. However, "
5041  "the result of getColMap() is null. This should never happen. Please "
5042  "report this bug to the Tpetra developers.");
5043 
5044  // Return value 1: The number of column indices (counting
5045  // duplicates) that could not be converted to local indices,
5046  // because they were not in the column Map on the calling process.
5047  size_t lclNumErrs = 0;
5048  std::ostringstream errStrm; // for return value 2 (error string)
5049 
5050  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
5051  const map_type& colMap = * (this->getColMap ());
5052 
5053  if (this->isGloballyIndexed () && lclNumRows != 0) {
5054  // This is a host-accessible View.
5055  typename row_entries_type::const_type h_numRowEnt =
5056  this->k_numRowEntries_;
5057 
5058  // Allocate space for local indices.
5059  if (this->getProfileType () == StaticProfile) {
5060  // If GO and LO are the same size, we can reuse the existing
5061  // array of 1-D index storage to convert column indices from
5062  // GO to LO. Otherwise, we'll just allocate a new buffer.
5063  constexpr bool LO_GO_same = std::is_same<LO, GO>::value;
5064  if (LO_GO_same) {
5065  // This prevents a build error (illegal assignment) if
5066  // LO_GO_same is _not_ true. Only the first branch
5067  // (returning k_gblInds1D_) should ever get taken.
5068  k_lclInds1D_ = Kokkos::Impl::if_c<LO_GO_same,
5070  lcl_col_inds_type>::select (k_gblInds1D_, k_lclInds1D_);
5071  }
5072  else {
5073  if (k_rowPtrs_.extent (0) == 0) {
5074  errStrm << "k_rowPtrs_.extent(0) == 0. This should never "
5075  "happen here. Please report this bug to the Tpetra developers."
5076  << endl;
5077  // Need to return early.
5078  return std::make_pair (Tpetra::Details::OrdinalTraits<size_t>::invalid (),
5079  errStrm.str ());
5080  }
5081  const auto numEnt = ::Tpetra::Details::getEntryOnHost (k_rowPtrs_, lclNumRows);
5082 
5083  // mfh 17 Dec 2016: We don't need initial zero-fill of
5084  // k_lclInds1D_, because we will fill it below anyway.
5085  // AllowPadding would only help for aligned access (e.g.,
5086  // for vectorization) if we also were to pad each row to the
5087  // same alignment, so we'll skip AllowPadding for now.
5088 
5089  // using Kokkos::AllowPadding;
5090  using Kokkos::view_alloc;
5091  using Kokkos::WithoutInitializing;
5092 
5093  // When giving the label as an argument to
5094  // Kokkos::view_alloc, the label must be a string and not a
5095  // char*, else the code won't compile. This is because
5096  // view_alloc also allows a raw pointer as its first
5097  // argument. See
5098  // https://github.com/kokkos/kokkos/issues/434. This is a
5099  // large allocation typically, so the overhead of creating
5100  // an std::string is minor.
5101  const std::string label ("Tpetra::CrsGraph::lclind");
5102  k_lclInds1D_ =
5103  lcl_col_inds_type (view_alloc (label, WithoutInitializing), numEnt);
5104  }
5105 
5106  auto lclColMap = colMap.getLocalMap ();
5107  // This is a "device mirror" of the host View h_numRowEnt.
5108  //
5109  // NOTE (mfh 27 Sep 2016) Currently, the right way to get a
5110  // Device instance is to use its default constructor. See the
5111  // following Kokkos issue:
5112  //
5113  // https://github.com/kokkos/kokkos/issues/442
5114  auto k_numRowEnt = Kokkos::create_mirror_view (device_type (), h_numRowEnt);
5115 
5117  lclNumErrs =
5118  convertColumnIndicesFromGlobalToLocal<LO, GO, DT, offset_type, num_ent_type> (k_lclInds1D_,
5119  k_gblInds1D_,
5120  k_rowPtrs_,
5121  lclColMap,
5122  k_numRowEnt);
5123  if (lclNumErrs != 0) {
5124  const int myRank = [this] () {
5125  auto map = this->getMap ();
5126  if (map.is_null ()) {
5127  return 0;
5128  }
5129  else {
5130  auto comm = map->getComm ();
5131  return comm.is_null () ? 0 : comm->getRank ();
5132  }
5133  } ();
5134  const bool pluralNumErrs = (lclNumErrs != static_cast<size_t> (1));
5135  errStrm << "(Process " << myRank << ") When converting column "
5136  "indices from global to local, we encountered " << lclNumErrs
5137  << " ind" << (pluralNumErrs ? "ices" : "ex")
5138  << " that do" << (pluralNumErrs ? "es" : "")
5139  << " not live in the column Map on this process." << endl;
5140  }
5141 
5142  // We've converted column indices from global to local, so we
5143  // can deallocate the global column indices (which we know are
5144  // in 1-D storage, because the graph has static profile).
5145  k_gblInds1D_ = gbl_col_inds_type ();
5146  }
5147  else { // the graph has dynamic profile (2-D index storage)
5148  // Avoid any drama with *this capture, by extracting the
5149  // variables that the thread-parallel loop will need below.
5150  // This is just a shallow copy.
5151  Teuchos::ArrayRCP<Teuchos::Array<LO> > lclInds2D (lclNumRows);
5152  Teuchos::ArrayRCP<Teuchos::Array<GO> > gblInds2D = this->gblInds2D_;
5153 
5154  // We must use a host thread parallelization here, because
5155  // Teuchos::ArrayRCP does not work in CUDA.
5156  typedef typename Kokkos::View<LO*, device_type>::HostMirror::execution_space
5157  host_execution_space;
5158  typedef Kokkos::RangePolicy<host_execution_space, LO> range_type;
5159  Kokkos::parallel_reduce (
5160  "Tpetra::CrsGraph::makeIndicesLocal (DynamicProfile)",
5161  range_type (0, lclNumRows),
5162  [&gblInds2D, &h_numRowEnt, &lclInds2D, &colMap] (const LO& lclRow, size_t& numErrs) {
5163  const GO* const curGblInds = gblInds2D[lclRow].getRawPtr ();
5164  // NOTE (mfh 26 Jun 2016) It's always legal to cast the
5165  // number of entries in a row to LO, as long as the row
5166  // doesn't have too many duplicate entries.
5167  const LO rna = static_cast<LO> (gblInds2D[lclRow].size ());
5168  const LO numEnt = static_cast<LO> (h_numRowEnt(lclRow));
5169  lclInds2D[lclRow].resize (rna); // purely thread-local, so safe
5170  LO* const curLclInds = lclInds2D[lclRow].getRawPtr ();
5171  for (LO j = 0; j < numEnt; ++j) {
5172  const GO gid = curGblInds[j];
5173  const LO lid = colMap.getLocalElement (gid);
5174  curLclInds[j] = lid;
5175  if (lid == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
5176  ++numErrs;
5177  }
5178  }
5179  }, lclNumErrs);
5180 
5181  this->lclInds2D_ = lclInds2D; // "commit" the result
5182 
5183  // If we detected an error in the above loop, go back and find
5184  // the global column indices not in the column Map on the
5185  // calling process.
5186  if (lclNumErrs != 0) {
5187  const int myRank = [this] () {
5188  auto map = this->getMap ();
5189  if (map.is_null ()) {
5190  return 0;
5191  }
5192  else {
5193  auto comm = map->getComm ();
5194  return comm.is_null () ? 0 : comm->getRank ();
5195  }
5196  } ();
5197 
5198  // If there are too many errors, don't bother printing them.
5199  size_t tooManyErrsToPrint = ::Tpetra::Details::Behavior::verbosePrintCountThreshold();
5200  if (lclNumErrs > tooManyErrsToPrint) {
5201  errStrm << "(Process " << myRank << ") When converting column "
5202  "indices from global to local, we encountered " << lclNumErrs
5203  << " indices that do not live in the column Map on this "
5204  "process. That's exceeds the allowable number to print."
5205  << "This limit is controllable by TPETRA_VERBOSE_PRINT_COUNT_THRESHOLD." << endl;
5206  }
5207  else {
5208  // Map from local row index, to any global column indices
5209  // that do not live in the column Map on the calling process.
5210  std::map<LO, std::vector<GO> > badColInds;
5211  // List of local rows lclRow for which h_numRowEnt[lclRow]
5212  // > gblInds2D_[lclRow].size().
5213  std::vector<LO> badLclRows;
5214 
5215  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
5216  const size_t numEnt = static_cast<size_t> (h_numRowEnt[lclRow]);
5217 
5218  Teuchos::ArrayView<const GO> curGblInds = gblInds2D_[lclRow] ();
5219  if (numEnt > static_cast<size_t> (curGblInds.size ())) {
5220  badLclRows.push_back (lclRow);
5221  }
5222  else {
5223  for (size_t j = 0; j < numEnt; ++j) {
5224  const GO gid = curGblInds[j];
5225  const LO lid = colMap.getLocalElement (gid);
5226  if (lid == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
5227  badColInds[lclRow].push_back (gid);
5228  }
5229  }
5230  }
5231  }
5232 
5233  const bool pluralNumErrs = (lclNumErrs != static_cast<size_t> (1));
5234  errStrm << "(Process " << myRank << ") When converting column "
5235  "indices from global to local, we encountered " << lclNumErrs
5236  << " ind" << (pluralNumErrs ? "ices" : "ex") << " that "
5237  "do" << (pluralNumErrs ? "es" : "")
5238  << " not live in the column Map on this process." << endl
5239  << "(Process " << myRank << ") Here are the bad global "
5240  "indices, listed by local row: " << endl;
5241  for (auto && eachPair : badColInds) {
5242  const LO lclRow = eachPair.first;
5243  const GO gblRow = rowMap_->getGlobalElement (lclRow);
5244  errStrm << "(Process " << myRank << ") Local row " << lclRow
5245  << " (global row " << gblRow << "): [";
5246  const size_t numBad = eachPair.second.size ();
5247  for (size_t k = 0; k < numBad; ++k) {
5248  errStrm << eachPair.second[k];
5249  if (k + size_t (1) < numBad) {
5250  errStrm << ",";
5251  }
5252  }
5253  errStrm << "]" << endl;
5254  }
5255 
5256  if (badLclRows.size () != 0) {
5257  if (lclNumErrs == 0) {
5258  // We really want lclNumErrs to be just the count of
5259  // bad column indices, but lclNumErrs != 0 also
5260  // doubles as a generic indication of error.
5261  lclNumErrs = badLclRows.size ();
5262  }
5263 
5264  errStrm << "(Process " << myRank << ") When converting column "
5265  "indices from global to local, we (also) encountered the "
5266  "following local rows lclRow on this process for which "
5267  "h_numRowEnt[lclRow] > gblInds2D_[lclRow].size(). This "
5268  "likely indicates a bug in Tpetra." << endl
5269  << "(Process " << myRank << ") [";
5270  const size_t numBad = badLclRows.size ();
5271  for (size_t k = 0; k < numBad; ++k) {
5272  const LO lclRow = badLclRows[k];
5273  errStrm << "{lclRow: " << lclRow
5274  << "h_numRowEnt[lclRow]: " << h_numRowEnt[lclRow]
5275  << "gblInds2D_[lclRow].size(): "
5276  << gblInds2D_[lclRow].size () << "}";
5277  if (k + size_t (1) < numBad) {
5278  errStrm << ", ";
5279  }
5280  }
5281  errStrm << "]" << endl;
5282  }
5283  }
5284  }
5285 
5286  this->gblInds2D_ = Teuchos::null;
5287  }
5288  } // globallyIndexed() && lclNumRows > 0
5289 
5290  this->lclGraph_ = local_graph_type (this->k_lclInds1D_, this->k_rowPtrs_);
5291  this->indicesAreLocal_ = true;
5292  this->indicesAreGlobal_ = false;
5293  this->checkInternalState ();
5294 
5295  return std::make_pair (lclNumErrs, errStrm.str ());
5296  }
5297 
5298 
5299  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5300  void
5302  makeColMap (Teuchos::Array<int>& remotePIDs)
5303  {
5304  using ::Tpetra::Details::ProfilingRegion;
5305  ProfilingRegion regionSortAndMerge ("Tpetra::CrsGraph::makeColMap");
5306  const bool debug = ::Tpetra::Details::Behavior::debug ();
5307 
5308  // this->colMap_ should be null at this point, but we accept the
5309  // future possibility that it might not be (esp. if we decide
5310  // later to support graph structure changes after first
5311  // fillComplete, which CrsGraph does not currently (as of 12 Feb
5312  // 2017) support).
5313  Teuchos::RCP<const map_type> colMap = this->colMap_;
5314  const bool sortEachProcsGids =
5315  this->sortGhostsAssociatedWithEachProcessor_;
5316 
5317  // FIXME (mfh 12 Feb 2017) ::Tpetra::Details::makeColMap returns a
5318  // per-process error code. If an error does occur on a process,
5319  // ::Tpetra::Details::makeColMap does NOT promise that all processes will
5320  // notice that error. This is the caller's responsibility. For
5321  // now, we only propagate (to all processes) and report the error
5322  // in debug mode. In the future, we need to add the local/global
5323  // error handling scheme used in BlockCrsMatrix to this class.
5324  if (debug) {
5325  using Teuchos::outArg;
5326  using Teuchos::REDUCE_MIN;
5327  using Teuchos::reduceAll;
5328  const char tfecfFuncName[] = "makeColMap: ";
5329 
5330  std::ostringstream errStrm;
5331  const int lclErrCode =
5332  ::Tpetra::Details::makeColMap (colMap, remotePIDs, this->getDomainMap (),
5333  *this, sortEachProcsGids, &errStrm);
5334  auto comm = this->getComm ();
5335  if (! comm.is_null ()) {
5336  const int lclSuccess = (lclErrCode == 0) ? 1 : 0;
5337  int gblSuccess = 0; // output argument
5338  reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess,
5339  outArg (gblSuccess));
5340  if (gblSuccess != 1) {
5341  std::ostringstream os;
5342  Tpetra::Details::gathervPrint (os, errStrm.str (), *comm);
5343  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5344  (true, std::runtime_error, "An error happened on at least one "
5345  "(MPI) process in the CrsGraph's communicator. Here are all "
5346  "processes' error messages:" << std::endl << os.str ());
5347  }
5348  }
5349  }
5350  else {
5351  (void) ::Tpetra::Details::makeColMap (colMap, remotePIDs, this->getDomainMap (),
5352  *this, sortEachProcsGids, nullptr);
5353  }
5354  // See above. We want to admit the possibility of makeColMap
5355  // actually revising an existing column Map, even though that
5356  // doesn't currently (as of 10 May 2017) happen.
5357  this->colMap_ = colMap;
5358 
5359  checkInternalState ();
5360  }
5361 
5362 
5363  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5364  void
5366  sortAndMergeAllIndices (const bool sorted, const bool merged)
5367  {
5368  using ::Tpetra::Details::ProfilingRegion;
5369  typedef LocalOrdinal LO;
5370  typedef typename Kokkos::View<LO*, device_type>::HostMirror::execution_space
5371  host_execution_space;
5372  typedef Kokkos::RangePolicy<host_execution_space, LO> range_type;
5373  const char tfecfFuncName[] = "sortAndMergeAllIndices: ";
5374  ProfilingRegion regionSortAndMerge ("Tpetra::CrsGraph::sortAndMergeAllIndices");
5375 
5376  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5377  (this->isGloballyIndexed (), std::logic_error,
5378  "This method may only be called after makeIndicesLocal." );
5379 
5380  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5381  (! merged && this->isStorageOptimized (), std::logic_error,
5382  "The graph is already storage optimized, so we shouldn't be merging any "
5383  "indices. Please report this bug to the Tpetra developers.");
5384 
5385  if (! sorted || ! merged) {
5386  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
5387  size_t totalNumDups = 0;
5388  // FIXME (mfh 08 May 2017) This may assume CUDA UVM.
5389  Kokkos::parallel_reduce (range_type (0, lclNumRows),
5390  [this, sorted, merged] (const LO& lclRow, size_t& numDups) {
5391  const RowInfo rowInfo = this->getRowInfo (lclRow);
5392  numDups += this->sortAndMergeRowIndices (rowInfo, sorted, merged);
5393  }, totalNumDups);
5394  this->indicesAreSorted_ = true; // we just sorted every row
5395  this->noRedundancies_ = true; // we just merged every row
5396  }
5397  }
5398 
5399 
5400  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5401  void
5403  makeImportExport (Teuchos::Array<int>& remotePIDs,
5404  const bool useRemotePIDs)
5405  {
5406  using ::Tpetra::Details::ProfilingRegion;
5407  using Teuchos::ParameterList;
5408  using Teuchos::RCP;
5409  using Teuchos::rcp;
5410  const char tfecfFuncName[] = "makeImportExport: ";
5411  ProfilingRegion regionMIE ("Tpetra::CrsGraph::makeImportExport");
5412 
5413  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5414  (! this->hasColMap (), std::logic_error,
5415  "This method may not be called unless the graph has a column Map.");
5416  RCP<ParameterList> params = this->getNonconstParameterList (); // could be null
5417 
5418  // Don't do any checks to see if we need to create the Import, if
5419  // it exists already.
5420  //
5421  // FIXME (mfh 25 Mar 2013) This will become incorrect if we
5422  // change CrsGraph in the future to allow changing the column
5423  // Map after fillComplete. For now, the column Map is fixed
5424  // after the first fillComplete call.
5425  if (importer_.is_null ()) {
5426  // Create the Import instance if necessary.
5427  if (domainMap_ != colMap_ && (! domainMap_->isSameAs (*colMap_))) {
5428  if (params.is_null () || ! params->isSublist ("Import")) {
5429  if (useRemotePIDs) {
5430  importer_ = rcp (new import_type (domainMap_, colMap_, remotePIDs));
5431  }
5432  else {
5433  importer_ = rcp (new import_type (domainMap_, colMap_));
5434  }
5435  }
5436  else {
5437  RCP<ParameterList> importSublist = sublist (params, "Import", true);
5438  if (useRemotePIDs) {
5439  RCP<import_type> newImp =
5440  rcp (new import_type (domainMap_, colMap_, remotePIDs,
5441  importSublist));
5442  importer_ = newImp;
5443  }
5444  else {
5445  importer_ = rcp (new import_type (domainMap_, colMap_, importSublist));
5446  }
5447  }
5448  }
5449  }
5450 
5451  // Don't do any checks to see if we need to create the Export, if
5452  // it exists already.
5453  if (exporter_.is_null ()) {
5454  // Create the Export instance if necessary.
5455  if (rangeMap_ != rowMap_ && ! rangeMap_->isSameAs (*rowMap_)) {
5456  if (params.is_null () || ! params->isSublist ("Export")) {
5457  exporter_ = rcp (new export_type (rowMap_, rangeMap_));
5458  }
5459  else {
5460  RCP<ParameterList> exportSublist = sublist (params, "Export", true);
5461  exporter_ = rcp (new export_type (rowMap_, rangeMap_, exportSublist));
5462  }
5463  }
5464  }
5465  }
5466 
5467 
5468  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5469  std::string
5472  {
5473  std::ostringstream oss;
5474  oss << dist_object_type::description ();
5475  if (isFillComplete ()) {
5476  oss << "{status = fill complete"
5477  << ", global rows = " << getGlobalNumRows()
5478  << ", global cols = " << getGlobalNumCols()
5479  << ", global num entries = " << getGlobalNumEntries()
5480  << "}";
5481  }
5482  else {
5483  oss << "{status = fill not complete"
5484  << ", global rows = " << getGlobalNumRows()
5485  << "}";
5486  }
5487  return oss.str();
5488  }
5489 
5490 
5491  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5492  void
5494  describe (Teuchos::FancyOStream &out,
5495  const Teuchos::EVerbosityLevel verbLevel) const
5496  {
5497  using Teuchos::ArrayView;
5498  using Teuchos::Comm;
5499  using Teuchos::RCP;
5500  using Teuchos::VERB_DEFAULT;
5501  using Teuchos::VERB_NONE;
5502  using Teuchos::VERB_LOW;
5503  using Teuchos::VERB_MEDIUM;
5504  using Teuchos::VERB_HIGH;
5505  using Teuchos::VERB_EXTREME;
5506  using std::endl;
5507  using std::setw;
5508 
5509  Teuchos::EVerbosityLevel vl = verbLevel;
5510  if (vl == VERB_DEFAULT) vl = VERB_LOW;
5511  RCP<const Comm<int> > comm = this->getComm();
5512  const int myImageID = comm->getRank(),
5513  numImages = comm->getSize();
5514  size_t width = 1;
5515  for (size_t dec=10; dec<getGlobalNumRows(); dec *= 10) {
5516  ++width;
5517  }
5518  width = std::max<size_t> (width, static_cast<size_t> (11)) + 2;
5519  Teuchos::OSTab tab (out);
5520  // none: print nothing
5521  // low: print O(1) info from node 0
5522  // medium: print O(P) info, num entries per node
5523  // high: print O(N) info, num entries per row
5524  // extreme: print O(NNZ) info: print graph indices
5525  //
5526  // for medium and higher, print constituent objects at specified verbLevel
5527  if (vl != VERB_NONE) {
5528  if (myImageID == 0) out << this->description() << std::endl;
5529  // O(1) globals, minus what was already printed by description()
5530  if (isFillComplete() && myImageID == 0) {
5531  out << "Global number of diagonals = " << globalNumDiags_ << std::endl;
5532  out << "Global max number of row entries = " << globalMaxNumRowEntries_ << std::endl;
5533  }
5534  // constituent objects
5535  if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
5536  if (myImageID == 0) out << "\nRow map: " << std::endl;
5537  rowMap_->describe(out,vl);
5538  if (colMap_ != Teuchos::null) {
5539  if (myImageID == 0) out << "\nColumn map: " << std::endl;
5540  colMap_->describe(out,vl);
5541  }
5542  if (domainMap_ != Teuchos::null) {
5543  if (myImageID == 0) out << "\nDomain map: " << std::endl;
5544  domainMap_->describe(out,vl);
5545  }
5546  if (rangeMap_ != Teuchos::null) {
5547  if (myImageID == 0) out << "\nRange map: " << std::endl;
5548  rangeMap_->describe(out,vl);
5549  }
5550  }
5551  // O(P) data
5552  if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
5553  for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
5554  if (myImageID == imageCtr) {
5555  out << "Node ID = " << imageCtr << std::endl
5556  << "Node number of entries = " << this->getNodeNumEntries () << std::endl
5557  << "Node number of diagonals = " << nodeNumDiags_ << std::endl
5558  << "Node max number of entries = " << nodeMaxNumRowEntries_ << std::endl;
5559  if (! indicesAreAllocated ()) {
5560  out << "Indices are not allocated." << std::endl;
5561  }
5562  }
5563  comm->barrier();
5564  comm->barrier();
5565  comm->barrier();
5566  }
5567  }
5568  // O(N) and O(NNZ) data
5569  if (vl == VERB_HIGH || vl == VERB_EXTREME) {
5570  for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
5571  if (myImageID == imageCtr) {
5572  out << std::setw(width) << "Node ID"
5573  << std::setw(width) << "Global Row"
5574  << std::setw(width) << "Num Entries";
5575  if (vl == VERB_EXTREME) {
5576  out << " Entries";
5577  }
5578  out << std::endl;
5579  const LocalOrdinal lclNumRows =
5580  static_cast<LocalOrdinal> (this->getNodeNumRows ());
5581  for (LocalOrdinal r=0; r < lclNumRows; ++r) {
5582  const RowInfo rowinfo = this->getRowInfo (r);
5583  GlobalOrdinal gid = rowMap_->getGlobalElement(r);
5584  out << std::setw(width) << myImageID
5585  << std::setw(width) << gid
5586  << std::setw(width) << rowinfo.numEntries;
5587  if (vl == VERB_EXTREME) {
5588  out << " ";
5589  if (isGloballyIndexed()) {
5590  ArrayView<const GlobalOrdinal> rowview = getGlobalView(rowinfo);
5591  for (size_t j=0; j < rowinfo.numEntries; ++j) out << rowview[j] << " ";
5592  }
5593  else if (isLocallyIndexed()) {
5594  ArrayView<const LocalOrdinal> rowview = getLocalView(rowinfo);
5595  for (size_t j=0; j < rowinfo.numEntries; ++j) out << colMap_->getGlobalElement(rowview[j]) << " ";
5596  }
5597  }
5598  out << std::endl;
5599  }
5600  }
5601  comm->barrier();
5602  comm->barrier();
5603  comm->barrier();
5604  }
5605  }
5606  }
5607  }
5608 
5609 
5610  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5611  bool
5613  checkSizes (const SrcDistObject& /* source */)
5614  {
5615  // It's not clear what kind of compatibility checks on sizes can
5616  // be performed here. Epetra_CrsGraph doesn't check any sizes for
5617  // compatibility.
5618  return true;
5619  }
5620 
5621  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5622  void
5624 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
5625  copyAndPermuteNew
5626 #else // TPETRA_ENABLE_DEPRECATED_CODE
5627  copyAndPermute
5628 #endif // TPETRA_ENABLE_DEPRECATED_CODE
5629  (const SrcDistObject& source,
5630  const size_t numSameIDs,
5631  const Kokkos::DualView<const local_ordinal_type*,
5632  buffer_device_type>& permuteToLIDs,
5633  const Kokkos::DualView<const local_ordinal_type*,
5634  buffer_device_type>& permuteFromLIDs)
5635  {
5636  using std::endl;
5637  using LO = local_ordinal_type;
5638  using GO = global_ordinal_type;
5639  using this_type = CrsGraph<LO, GO, node_type>;
5640  using row_graph_type = RowGraph<LO, GO, node_type>;
5641  const char tfecfFuncName[] = "copyAndPermute: ";
5642  const bool debug = ::Tpetra::Details::Behavior::debug ("CrsGraph");
5643 
5644  std::unique_ptr<std::string> prefix;
5645  if (debug) {
5646  std::ostringstream os;
5647  const int myRank = this->getMap ()->getComm ()->getRank ();
5648  os << "Proc " << myRank << ": Tpetra::CrsGraph::copyAndPermute: ";
5649  prefix = std::unique_ptr<std::string> (new std::string (os.str ()));
5650  os << endl;
5651  std::cerr << os.str ();
5652  }
5653 
5654  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5655  (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0),
5656  std::runtime_error, "permuteToLIDs.extent(0) = "
5657  << permuteToLIDs.extent (0) << " != permuteFromLIDs.extent(0) = "
5658  << permuteFromLIDs.extent (0) << ".");
5659 
5660  // We know from checkSizes that the source object is a
5661  // row_graph_type, so we don't need to check again.
5662  const row_graph_type& srcRowGraph =
5663  dynamic_cast<const row_graph_type&> (source);
5664 
5665  if (this->getProfileType () == StaticProfile) {
5666  if (debug) {
5667  std::ostringstream os;
5668  os << *prefix << "Target is StaticProfile; do CRS padding" << endl;
5669  std::cerr << os.str ();
5670  }
5671  auto padding =
5672  computeCrsPadding (srcRowGraph, numSameIDs, permuteToLIDs, permuteFromLIDs);
5673  this->applyCrsPadding(padding);
5674  }
5675  else if (debug) {
5676  std::ostringstream os;
5677  os << *prefix << "Target is DynamicProfile" << endl;
5678  std::cerr << os.str ();
5679  }
5680 
5681  // If the source object is actually a CrsGraph, we can use view
5682  // mode instead of copy mode to access the entries in each row,
5683  // if the graph is not fill complete.
5684  const this_type* srcCrsGraph = dynamic_cast<const this_type*> (&source);
5685 
5686  const map_type& srcRowMap = * (srcRowGraph.getRowMap ());
5687  const map_type& tgtRowMap = * (this->getRowMap ());
5688  const bool src_filled = srcRowGraph.isFillComplete ();
5689  Teuchos::Array<GO> row_copy;
5690  LO myid = 0;
5691 
5692  //
5693  // "Copy" part of "copy and permute."
5694  //
5695  if (src_filled || srcCrsGraph == nullptr) {
5696  if (debug) {
5697  std::ostringstream os;
5698  os << *prefix << "src_filled || srcCrsGraph == nullptr" << endl;
5699  std::cerr << os.str ();
5700  }
5701  // If the source graph is fill complete, we can't use view mode,
5702  // because the data might be stored in a different format not
5703  // compatible with the expectations of view mode. Also, if the
5704  // source graph is not a CrsGraph, we can't use view mode,
5705  // because RowGraph only provides copy mode access to the data.
5706  for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
5707  const GO gid = srcRowMap.getGlobalElement (myid);
5708  size_t row_length = srcRowGraph.getNumEntriesInGlobalRow (gid);
5709  row_copy.resize (row_length);
5710  size_t check_row_length = 0;
5711  srcRowGraph.getGlobalRowCopy (gid, row_copy (), check_row_length);
5712  this->insertGlobalIndices (gid, row_copy ());
5713  }
5714  } else {
5715  if (debug) {
5716  std::ostringstream os;
5717  os << *prefix << "! src_filled && srcCrsGraph != nullptr" << endl;
5718  std::cerr << os.str ();
5719  }
5720  for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
5721  const GO gid = srcRowMap.getGlobalElement (myid);
5722  Teuchos::ArrayView<const GO> row;
5723  srcCrsGraph->getGlobalRowView (gid, row);
5724  this->insertGlobalIndices (gid, row);
5725  }
5726  }
5727 
5728  //
5729  // "Permute" part of "copy and permute."
5730  //
5731  auto permuteToLIDs_h = permuteToLIDs.view_host ();
5732  auto permuteFromLIDs_h = permuteFromLIDs.view_host ();
5733 
5734  if (src_filled || srcCrsGraph == nullptr) {
5735  for (LO i = 0; i < static_cast<LO> (permuteToLIDs_h.extent (0)); ++i) {
5736  const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs_h[i]);
5737  const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs_h[i]);
5738  size_t row_length = srcRowGraph.getNumEntriesInGlobalRow (srcgid);
5739  row_copy.resize (row_length);
5740  size_t check_row_length = 0;
5741  srcRowGraph.getGlobalRowCopy (srcgid, row_copy (), check_row_length);
5742  this->insertGlobalIndices (mygid, row_copy ());
5743  }
5744  } else {
5745  for (LO i = 0; i < static_cast<LO> (permuteToLIDs_h.extent (0)); ++i) {
5746  const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs_h[i]);
5747  const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs_h[i]);
5748  Teuchos::ArrayView<const GO> row;
5749  srcCrsGraph->getGlobalRowView (srcgid, row);
5750  this->insertGlobalIndices (mygid, row);
5751  }
5752  }
5753 
5754  if (debug) {
5755  std::ostringstream os;
5756  os << *prefix << "Done" << endl;
5757  std::cerr << os.str ();
5758  }
5759  }
5760 
5761  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5762  void
5763  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5764  applyCrsPadding(const Kokkos::UnorderedMap<LocalOrdinal, size_t, device_type>& padding)
5765  {
5766  // const char tfecfFuncName[] = "applyCrsPadding";
5767  using execution_space = typename device_type::execution_space;
5768  using row_ptrs_type = typename local_graph_type::row_map_type::non_const_type;
5769  using indices_type = t_GlobalOrdinal_1D;
5770  using local_indices_type = typename local_graph_type::entries_type::non_const_type;
5771  using range_policy = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LocalOrdinal>>;
5773 
5774  if (padding.size() == 0)
5775  return;
5776 
5777  // Assume global indexing we don't have any indices yet
5778  if (! this->indicesAreAllocated()) {
5779  allocateIndices(GlobalIndices);
5780  }
5781 
5782  // Making copies here because k_rowPtrs_ has a const type. Otherwise, we
5783  // would use it directly.
5784 
5785  row_ptrs_type row_ptrs_beg("row_ptrs_beg", this->k_rowPtrs_.extent(0));
5786  Kokkos::deep_copy(row_ptrs_beg, this->k_rowPtrs_);
5787 
5788  const size_t N = (row_ptrs_beg.extent(0) == 0 ? 0 : row_ptrs_beg.extent(0) - 1);
5789  row_ptrs_type row_ptrs_end("row_ptrs_end", N);
5790 
5791  bool refill_num_row_entries = false;
5792  if (this->k_numRowEntries_.extent(0) > 0) {
5793  // Case 1: Unpacked storage
5794  refill_num_row_entries = true;
5795  auto num_row_entries = this->k_numRowEntries_;
5796  Kokkos::parallel_for("Fill end row pointers", range_policy(0, N),
5797  KOKKOS_LAMBDA(const size_t i){
5798  row_ptrs_end(i) = row_ptrs_beg(i) + num_row_entries(i);
5799  }
5800  );
5801 
5802  } else {
5803  // mfh If packed storage, don't need row_ptrs_end to be separate allocation;
5804  // could just have it alias row_ptrs_beg+1.
5805  // Case 2: Packed storage
5806  Kokkos::parallel_for("Fill end row pointers", range_policy(0, N),
5807  KOKKOS_LAMBDA(const size_t i){
5808  row_ptrs_end(i) = row_ptrs_beg(i+1);
5809  }
5810  );
5811  }
5812 
5813  if(this->isGloballyIndexed()) {
5814  indices_type indices("indices", this->k_gblInds1D_.extent(0));
5815  Kokkos::deep_copy(indices, this->k_gblInds1D_);
5816  using padding_type = Kokkos::UnorderedMap<LocalOrdinal, size_t, device_type>;
5817  padCrsArrays<row_ptrs_type,indices_type,padding_type>(row_ptrs_beg, row_ptrs_end, indices, padding);
5818  this->k_gblInds1D_ = indices;
5819  }
5820  else {
5821  local_indices_type indices("indices", this->k_lclInds1D_.extent(0));
5822  Kokkos::deep_copy(indices, this->k_lclInds1D_);
5823  using padding_type = Kokkos::UnorderedMap<LocalOrdinal, size_t, device_type>;
5824  padCrsArrays<row_ptrs_type,local_indices_type,padding_type>(row_ptrs_beg, row_ptrs_end, indices, padding);
5825  this->k_lclInds1D_ = indices;
5826  }
5827 
5828  if (refill_num_row_entries) {
5829  auto num_row_entries = this->k_numRowEntries_;
5830  Kokkos::parallel_for("Fill num entries", range_policy(0, N),
5831  KOKKOS_LAMBDA(const size_t i){
5832  num_row_entries(i) = row_ptrs_end(i) - row_ptrs_beg(i);
5833  }
5834  );
5835  }
5836  this->k_rowPtrs_ = row_ptrs_beg;
5837  }
5838 
5839  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5840  Kokkos::UnorderedMap<LocalOrdinal, size_t, typename Node::device_type>
5841  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5842  computeCrsPadding (const RowGraph<LocalOrdinal,GlobalOrdinal,Node>& source,
5843  const size_t numSameIDs,
5844  const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs,
5845  const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs) const
5846  {
5847  using LO = LocalOrdinal;
5848  using padding_type = Kokkos::UnorderedMap<LO, size_t, device_type>;
5849  padding_type padding (numSameIDs + permuteFromLIDs.extent (0));
5850 
5851  computeCrsPaddingForSameIDs(padding, source, numSameIDs, false);
5852  computeCrsPaddingForPermutedIDs(padding, source, permuteToLIDs, permuteFromLIDs, false);
5853 
5854  Kokkos::fence (); // Make sure device sees changes made by host
5855  TEUCHOS_TEST_FOR_EXCEPTION
5856  (padding.failed_insert(), std::runtime_error,
5857  "failed to insert one or more indices in to padding map");
5858 
5859  return padding;
5860  }
5861 
5862  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5863  void
5864  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5865  computeCrsPaddingForSameIDs (Kokkos::UnorderedMap<LocalOrdinal, size_t, typename Node::device_type>& padding,
5866  const RowGraph<LocalOrdinal,GlobalOrdinal,Node>& source,
5867  const size_t numSameIDs,
5868  const bool padAll) const
5869  {
5870  using LO = LocalOrdinal;
5871  using GO = GlobalOrdinal;
5872  const char tfecfFuncName[] = "computeCrsPaddingForSameIds: ";
5873 
5874  Kokkos::fence ();
5875 
5876  using insert_result =
5877  typename Kokkos::UnorderedMap<LocalOrdinal, size_t, typename Node::device_type>::insert_result;
5878 
5879  // Compute extra capacity needed to accommodate incoming data
5880  const map_type& src_row_map = * (source.getRowMap ());
5881  for (LO tgt_lid = 0; tgt_lid < static_cast<LO> (numSameIDs); ++tgt_lid) {
5882  const GO src_gid = src_row_map.getGlobalElement(tgt_lid);
5883  auto num_src_entries = source.getNumEntriesInGlobalRow(src_gid);
5884 
5885  if (num_src_entries == 0)
5886  continue;
5887 
5888  insert_result result;
5889  const GO tgt_gid = rowMap_->getGlobalElement(tgt_lid);
5890  if (padAll) {
5891  result = padding.insert(tgt_lid, num_src_entries);
5892  }
5893  else {
5894  size_t check_row_length = 0;
5895  std::vector<GO> src_row_inds(num_src_entries);
5896  Teuchos::ArrayView<GO> src_row_inds_view(src_row_inds.data(), src_row_inds.size());
5897  source.getGlobalRowCopy(src_gid, src_row_inds_view, check_row_length);
5898 
5899  auto num_tgt_entries = this->getNumEntriesInGlobalRow(tgt_gid);
5900  std::vector<GO> tgt_row_inds(num_tgt_entries);
5901  Teuchos::ArrayView<GO> tgt_row_inds_view(tgt_row_inds.data(), tgt_row_inds.size());
5902  this->getGlobalRowCopy(tgt_gid, tgt_row_inds_view, check_row_length);
5903 
5904  size_t how_much_padding = 0;
5905  for (auto src_row_ind : src_row_inds) {
5906  if (std::find(tgt_row_inds.begin(), tgt_row_inds.end(), src_row_ind) == tgt_row_inds.end()) {
5907  // The target row does not have space for
5908  how_much_padding++;
5909  }
5910  }
5911  result = padding.insert (tgt_lid, how_much_padding);
5912  }
5913 
5914  // FIXME (mfh 09 Apr 2019) Kokkos::UnorderedMap is allowed to fail even if
5915  // the user did nothing wrong. We should actually have a retry option. I
5916  // just copied this code over from computeCrsPadding.
5917  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5918  (result.failed(), std::runtime_error,
5919  "unable to insert padding for LID " << tgt_lid);
5920  }
5921  }
5922 
5923  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5924  void
5925  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5926  computeCrsPaddingForPermutedIDs (Kokkos::UnorderedMap<LocalOrdinal, size_t, typename Node::device_type>& padding,
5927  const RowGraph<LocalOrdinal,GlobalOrdinal,Node>& source,
5928  const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs,
5929  const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs,
5930  const bool padAll) const
5931  {
5932  using LO = LocalOrdinal;
5933  using GO = GlobalOrdinal;
5934  const char tfecfFuncName[] = "computeCrsPaddingForPermutedIds: ";
5935  Kokkos::fence ();
5936 
5937  const map_type& src_row_map = * (source.getRowMap ());
5938 
5939  using insert_result =
5940  typename Kokkos::UnorderedMap<LocalOrdinal, size_t, typename Node::device_type>::insert_result;
5941  auto permuteToLIDs_h = permuteToLIDs.view_host ();
5942  auto permuteFromLIDs_h = permuteFromLIDs.view_host ();
5943  for (LO i = 0; i < static_cast<LO> (permuteToLIDs_h.extent (0)); ++i) {
5944  const GO src_gid = src_row_map.getGlobalElement(permuteFromLIDs_h[i]);
5945  auto num_src_entries = source.getNumEntriesInGlobalRow(src_gid);
5946 
5947  if (num_src_entries == 0)
5948  continue;
5949 
5950  insert_result result;
5951  const LO tgt_lid = permuteToLIDs_h[i];
5952  if (padAll)
5953  {
5954  result = padding.insert (tgt_lid, num_src_entries);
5955  }
5956  else {
5957  size_t check_row_length = 0;
5958  std::vector<GO> src_row_inds(num_src_entries);
5959  Teuchos::ArrayView<GO> src_row_inds_view(src_row_inds.data(), src_row_inds.size());
5960  source.getGlobalRowCopy(src_gid, src_row_inds_view, check_row_length);
5961 
5962  const GO tgt_gid = rowMap_->getGlobalElement (tgt_lid);
5963  auto num_tgt_entries = this->getNumEntriesInGlobalRow(tgt_gid);
5964  std::vector<GO> tgt_row_inds(num_tgt_entries);
5965  Teuchos::ArrayView<GO> tgt_row_inds_view(tgt_row_inds.data(), tgt_row_inds.size());
5966  this->getGlobalRowCopy(tgt_gid, tgt_row_inds_view, check_row_length);
5967 
5968  size_t how_much_padding = 0;
5969  for (auto src_row_ind : src_row_inds) {
5970  if (std::find(tgt_row_inds.begin(), tgt_row_inds.end(), src_row_ind) == tgt_row_inds.end()) {
5971  // The target row does not have space for
5972  how_much_padding++;
5973  }
5974  }
5975  result = padding.insert (tgt_lid, how_much_padding);
5976  }
5977  // FIXME (mfh 09 Apr 2019) Kokkos::UnorderedMap is allowed to
5978  // fail even if the user did nothing wrong. We should actually
5979  // have a retry option. I just copied this code over from
5980  // computeCrsPadding.
5981  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5982  (result.failed(), std::runtime_error,
5983  "unable to insert padding for LID " << tgt_lid);
5984  }
5985 
5986  }
5987 
5988  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5989  Kokkos::UnorderedMap<LocalOrdinal, size_t, typename Node::device_type>
5990  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5991  computeCrsPadding (const Kokkos::DualView<const local_ordinal_type*,
5992  buffer_device_type>& importLIDs,
5993  Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID) const
5994  {
5995  const char tfecfFuncName[] = "computeCrsPadding: ";
5996 
5997  // Creating padding for each new incoming index
5998  Kokkos::fence (); // Make sure device sees changes made by host
5999  using padding_type = Kokkos::UnorderedMap<local_ordinal_type, size_t, device_type>;
6000  padding_type padding (importLIDs.extent (0));
6001  auto numEnt = static_cast<size_t> (importLIDs.extent (0));
6002 
6003  auto importLIDs_h = importLIDs.view_host ();
6004  if (numPacketsPerLID.need_sync_host ()) {
6005  numPacketsPerLID.sync_host ();
6006  }
6007  auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6008 
6009  // without unpacking the import/export buffer, we don't know how many of the
6010  // numPacketsPerLID[i] LIDs exist in the target. Below, it is assumed that
6011  // none do, and padding is requested for all.
6012  for (size_t i = 0; i < numEnt; ++i) {
6013  auto result = padding.insert (importLIDs_h[i], numPacketsPerLID_h[i]);
6014  // FIXME (mfh 09 Apr 2019) See note in other computeCrsPaddingoverload.
6015  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6016  (result.failed(), std::runtime_error,
6017  "unable to insert padding for LID " << importLIDs_h[i]);
6018  }
6019 
6020  TEUCHOS_TEST_FOR_EXCEPTION
6021  (padding.failed_insert(), std::runtime_error,
6022  "failed to insert one or more indices in to padding map");
6023  return padding;
6024  }
6025 
6026  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6027  void
6028  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
6029 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
6030  packAndPrepareNew
6031 #else // TPETRA_ENABLE_DEPRECATED_CODE
6032  packAndPrepare
6033 #endif // TPETRA_ENABLE_DEPRECATED_CODE
6034  (const SrcDistObject& source,
6035  const Kokkos::DualView<const local_ordinal_type*,
6036  buffer_device_type>& exportLIDs,
6037  Kokkos::DualView<packet_type*,
6038  buffer_device_type>& exports,
6039  Kokkos::DualView<size_t*,
6040  buffer_device_type> numPacketsPerLID,
6041  size_t& constantNumPackets,
6042  Distributor& distor)
6043  {
6045  using GO = global_ordinal_type;
6046  using std::endl;
6047  using crs_graph_type =
6048  CrsGraph<local_ordinal_type, global_ordinal_type, node_type>;
6049  using row_graph_type =
6050  RowGraph<local_ordinal_type, global_ordinal_type, node_type>;
6051  const char tfecfFuncName[] = "packAndPrepare: ";
6052  ProfilingRegion region_papn ("Tpetra::CrsGraph::packAndPrepare");
6053 
6054  const bool debug = ::Tpetra::Details::Behavior::debug ("CrsGraph");
6055  std::unique_ptr<std::string> prefix;
6056  if (debug) {
6057  std::ostringstream os;
6058  const int myRank = this->getMap ()->getComm ()->getRank ();
6059  os << "Proc " << myRank << ": Tpetra::CrsGraph::packAndPrepare: ";
6060  prefix = std::unique_ptr<std::string> (new std::string (os.str ()));
6061  os << "Start" << endl;
6062  std::cerr << os.str ();
6063  }
6064 
6065  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6066  (exportLIDs.extent (0) != numPacketsPerLID.extent (0),
6067  std::runtime_error,
6068  "exportLIDs.extent(0) = " << exportLIDs.extent (0)
6069  << " != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent (0)
6070  << ".");
6071  const row_graph_type* srcRowGraphPtr =
6072  dynamic_cast<const row_graph_type*> (&source);
6073  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6074  (srcRowGraphPtr == nullptr, std::invalid_argument, "Source of an Export "
6075  "or Import operation to a CrsGraph must be a RowGraph with the same "
6076  "template parameters.");
6077  // We don't check whether src_graph has had fillComplete called,
6078  // because it doesn't matter whether the *source* graph has been
6079  // fillComplete'd. The target graph can not be fillComplete'd yet.
6080  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6081  (this->isFillComplete (), std::runtime_error,
6082  "The target graph of an Import or Export must not be fill complete.");
6083 
6084  const crs_graph_type* srcCrsGraphPtr =
6085  dynamic_cast<const crs_graph_type*> (&source);
6086 
6087  if (srcCrsGraphPtr == nullptr) {
6088  using Teuchos::ArrayView;
6089  using LO = local_ordinal_type;
6090 
6091  if (debug) {
6092  std::ostringstream os;
6093  os << *prefix << "Source is a RowGraph but not a CrsGraph" << endl;
6094  std::cerr << os.str ();
6095  }
6096  // RowGraph::pack serves the "old" DistObject interface. It
6097  // takes Teuchos::ArrayView and Teuchos::Array&. The latter
6098  // entails deep-copying the exports buffer on output. RowGraph
6099  // is a convenience interface when not a CrsGraph, so we accept
6100  // the performance hit.
6101  TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
6102  auto exportLIDs_h = exportLIDs.view_host ();
6103  ArrayView<const LO> exportLIDs_av (exportLIDs_h.data (),
6104  exportLIDs_h.extent (0));
6105  Teuchos::Array<GO> exports_a;
6106 
6107  numPacketsPerLID.clear_sync_state ();
6108  numPacketsPerLID.modify_host ();
6109  auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6110  ArrayView<size_t> numPacketsPerLID_av (numPacketsPerLID_h.data (),
6111  numPacketsPerLID_h.extent (0));
6112  srcRowGraphPtr->pack (exportLIDs_av, exports_a, numPacketsPerLID_av,
6113  constantNumPackets, distor);
6114  const size_t newSize = static_cast<size_t> (exports_a.size ());
6115  if (static_cast<size_t> (exports.extent (0)) != newSize) {
6116  using exports_dv_type = Kokkos::DualView<packet_type*, buffer_device_type>;
6117  exports = exports_dv_type ("exports", newSize);
6118  }
6119  Kokkos::View<const packet_type*, Kokkos::HostSpace,
6120  Kokkos::MemoryUnmanaged> exports_a_h (exports_a.getRawPtr (), newSize);
6121  exports.clear_sync_state ();
6122  exports.modify_host ();
6123  Kokkos::deep_copy (exports.view_host (), exports_a_h);
6124  }
6125  // packCrsGraphNew requires a valid localGraph.
6126  else if (! getColMap ().is_null () &&
6127  (lclGraph_.row_map.extent (0) != 0 ||
6128  getRowMap ()->getNodeNumElements () == 0)) {
6129  if (debug) {
6130  std::ostringstream os;
6131  os << *prefix << "packCrsGraphNew path" << endl;
6132  std::cerr << os.str ();
6133  }
6134  using export_pids_type =
6135  Kokkos::DualView<const int*, buffer_device_type>;
6136  export_pids_type exportPIDs; // not filling it; needed for syntax
6137  using LO = local_ordinal_type;
6138  using NT = node_type;
6140  packCrsGraphNew<LO,GO,NT> (*srcCrsGraphPtr, exportLIDs, exportPIDs,
6141  exports, numPacketsPerLID,
6142  constantNumPackets, false, distor);
6143  }
6144  else {
6145  srcCrsGraphPtr->packFillActiveNew (exportLIDs, exports, numPacketsPerLID,
6146  constantNumPackets, distor);
6147  }
6148 
6149  if (debug) {
6150  std::ostringstream os;
6151  os << *prefix << "Done" << endl;
6152  std::cerr << os.str ();
6153  }
6154  }
6155 
6156  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6157  void
6159  pack (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
6160  Teuchos::Array<GlobalOrdinal>& exports,
6161  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
6162  size_t& constantNumPackets,
6163  Distributor& distor) const
6164  {
6165  auto col_map = this->getColMap();
6166  // packCrsGraph requires a valid localGraph.
6167  if( !col_map.is_null() && (lclGraph_.row_map.extent(0) != 0 || getRowMap()->getNodeNumElements() ==0)) {
6169  packCrsGraph<LocalOrdinal,GlobalOrdinal,Node>(*this, exports, numPacketsPerLID,
6170  exportLIDs, constantNumPackets, distor);
6171  }
6172  else {
6173  this->packFillActive(exportLIDs, exports, numPacketsPerLID,
6174  constantNumPackets, distor);
6175  }
6176  }
6177 
6178  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6179  void
6181  packFillActive (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
6182  Teuchos::Array<GlobalOrdinal>& exports,
6183  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
6184  size_t& constantNumPackets,
6185  Distributor& /* distor */) const
6186  {
6187  typedef LocalOrdinal LO;
6188  typedef GlobalOrdinal GO;
6189  typedef typename Kokkos::View<size_t*,
6190  device_type>::HostMirror::execution_space host_execution_space;
6191  typedef typename device_type::execution_space device_execution_space;
6192  const char tfecfFuncName[] = "packFillActive: ";
6193  const bool debug = ::Tpetra::Details::Behavior::debug("CrsGraph::pack");
6194  const int myRank = debug ? this->getMap ()->getComm ()->getRank () : 0;
6195 
6196  const auto numExportLIDs = exportLIDs.size ();
6197  if (debug) {
6198  std::ostringstream os;
6199  os << "Proc " << myRank << ": CrsGraph::pack: numExportLIDs = "
6200  << numExportLIDs << std::endl;
6201  std::cerr << os.str ();
6202  }
6203  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6204  (numExportLIDs != numPacketsPerLID.size (), std::runtime_error,
6205  "exportLIDs.size() = " << numExportLIDs << " != numPacketsPerLID.size()"
6206  " = " << numPacketsPerLID.size () << ".");
6207 
6208  // We may be accessing UVM data on host below, so ensure that the
6209  // device is done accessing it.
6210  device_execution_space().fence ();
6211 
6212  const map_type& rowMap = * (this->getRowMap ());
6213  const map_type* const colMapPtr = this->colMap_.getRawPtr ();
6214  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6215  (this->isLocallyIndexed () && colMapPtr == nullptr, std::logic_error,
6216  "This graph claims to be locally indexed, but its column Map is nullptr. "
6217  "This should never happen. Please report this bug to the Tpetra "
6218  "developers.");
6219 
6220  // We may pack different amounts of data for different rows.
6221  constantNumPackets = 0;
6222 
6223  // mfh 20 Sep 2017: Teuchos::ArrayView isn't thread safe (well,
6224  // it might be now, but we might as well be safe).
6225  size_t* const numPacketsPerLID_raw = numPacketsPerLID.getRawPtr ();
6226  const LO* const exportLIDs_raw = exportLIDs.getRawPtr ();
6227 
6228  // Count the total number of packets (column indices, in the case
6229  // of a CrsGraph) to pack. While doing so, set
6230  // numPacketsPerLID[i] to the number of entries owned by the
6231  // calling process in (local) row exportLIDs[i] of the graph, that
6232  // the caller wants us to send out.
6233  Kokkos::RangePolicy<host_execution_space, LO> inputRange (0, numExportLIDs);
6234  size_t totalNumPackets = 0;
6235  size_t errCount = 0;
6236  // lambdas turn what they capture const, so we can't
6237  // atomic_add(&errCount,1). Instead, we need a View to modify.
6238  typedef Kokkos::Device<host_execution_space, Kokkos::HostSpace>
6239  host_device_type;
6240  Kokkos::View<size_t, host_device_type> errCountView (&errCount);
6241  constexpr size_t ONE = 1;
6242 
6243  Kokkos::parallel_reduce ("Tpetra::CrsGraph::pack: totalNumPackets",
6244  inputRange,
6245  [=] (const LO& i, size_t& curTotalNumPackets) {
6246  const GO gblRow = rowMap.getGlobalElement (exportLIDs_raw[i]);
6247  if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
6248  Kokkos::atomic_add (&errCountView(), ONE);
6249  numPacketsPerLID_raw[i] = 0;
6250  }
6251  else {
6252  const size_t numEnt = this->getNumEntriesInGlobalRow (gblRow);
6253  numPacketsPerLID_raw[i] = numEnt;
6254  curTotalNumPackets += numEnt;
6255  }
6256  },
6257  totalNumPackets);
6258 
6259  if (debug) {
6260  std::ostringstream os;
6261  os << "Proc " << myRank << ": CrsGraph::pack: "
6262  << "totalNumPackets = " << totalNumPackets << std::endl;
6263  std::cerr << os.str ();
6264  }
6265  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6266  (errCount != 0, std::logic_error, "totalNumPackets count encountered "
6267  "one or more errors! errCount = " << errCount
6268  << ", totalNumPackets = " << totalNumPackets << ".");
6269  errCount = 0;
6270 
6271  // Allocate space for all the column indices to pack.
6272  exports.resize (totalNumPackets);
6273 
6274  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6275  (! this->supportsRowViews (), std::logic_error,
6276  "this->supportsRowViews() returns false; this should never happen. "
6277  "Please report this bug to the Tpetra developers.");
6278 
6279  // Loop again over the rows to export, and pack rows of indices
6280  // into the output buffer.
6281 
6282  if (debug) {
6283  std::ostringstream os;
6284  os << "Proc " << myRank << ": CrsGraph::pack: pack into exports" << std::endl;
6285  std::cerr << os.str ();
6286  }
6287 
6288  // Teuchos::ArrayView may not be thread safe, or may not be
6289  // efficiently thread safe. Better to use the raw pointer.
6290  GO* const exports_raw = exports.getRawPtr ();
6291  errCount = 0;
6292  Kokkos::parallel_scan ("Tpetra::CrsGraph::pack: pack from views",
6293  inputRange,
6294  [=] (const LO& i, size_t& exportsOffset, const bool final) {
6295  const size_t curOffset = exportsOffset;
6296  const GO gblRow = rowMap.getGlobalElement (exportLIDs_raw[i]);
6297  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (gblRow);
6298 
6299  if (rowInfo.localRow == Tpetra::Details::OrdinalTraits<size_t>::invalid ()) {
6300  if (debug) {
6301  std::ostringstream os;
6302  os << "Proc " << myRank << ": INVALID rowInfo: "
6303  << "i = " << i << ", lclRow = " << exportLIDs_raw[i] << std::endl;
6304  std::cerr << os.str ();
6305  }
6306  Kokkos::atomic_add (&errCountView(), ONE);
6307  }
6308  else if (curOffset + rowInfo.numEntries > totalNumPackets) {
6309  if (debug) {
6310  std::ostringstream os;
6311  os << "Proc " << myRank << ": UH OH! For i=" << i << ", lclRow="
6312  << exportLIDs_raw[i] << ", gblRow=" << gblRow << ", curOffset "
6313  "(= " << curOffset << ") + numEnt (= " << rowInfo.numEntries
6314  << ") > totalNumPackets (= " << totalNumPackets << ")."
6315  << std::endl;
6316  std::cerr << os.str ();
6317  }
6318  Kokkos::atomic_add (&errCountView(), ONE);
6319  }
6320  else {
6321  const LO numEnt = static_cast<LO> (rowInfo.numEntries);
6322  if (this->isLocallyIndexed ()) {
6323  const LO* lclColInds = nullptr;
6324  LO capacity = 0;
6325  const LO errCode =
6326  this->getLocalViewRawConst (lclColInds, capacity, rowInfo);
6327  if (errCode == 0) {
6328  if (final) {
6329  for (LO k = 0; k < numEnt; ++k) {
6330  const LO lclColInd = lclColInds[k];
6331  const GO gblColInd = colMapPtr->getGlobalElement (lclColInd);
6332  // Pack it, even if it's wrong. Let the receiving
6333  // process deal with it. Otherwise, we'll miss out
6334  // on any correct data.
6335  exports_raw[curOffset + k] = gblColInd;
6336  } // for each entry in the row
6337  } // final pass?
6338  exportsOffset = curOffset + numEnt;
6339  }
6340  else { // error in getting local row view
6341  Kokkos::atomic_add (&errCountView(), ONE);
6342  }
6343  }
6344  else if (this->isGloballyIndexed ()) {
6345  const GO* gblColInds = nullptr;
6346  LO capacity = 0;
6347  const LO errCode =
6348  this->getGlobalViewRawConst (gblColInds, capacity, rowInfo);
6349  if (errCode == 0) {
6350  if (final) {
6351  for (LO k = 0; k < numEnt; ++k) {
6352  const GO gblColInd = gblColInds[k];
6353  // Pack it, even if it's wrong. Let the receiving
6354  // process deal with it. Otherwise, we'll miss out
6355  // on any correct data.
6356  exports_raw[curOffset + k] = gblColInd;
6357  } // for each entry in the row
6358  } // final pass?
6359  exportsOffset = curOffset + numEnt;
6360  }
6361  else { // error in getting global row view
6362  Kokkos::atomic_add (&errCountView(), ONE);
6363  }
6364  }
6365  // If neither globally nor locally indexed, then the graph
6366  // has no entries in this row (or indeed, in any row on this
6367  // process) to pack.
6368  }
6369  });
6370 
6371  // We may have accessed UVM data on host above, so ensure that the
6372  // device sees these changes.
6373  device_execution_space().fence ();
6374 
6375  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6376  (errCount != 0, std::logic_error, "Packing encountered "
6377  "one or more errors! errCount = " << errCount
6378  << ", totalNumPackets = " << totalNumPackets << ".");
6379  }
6380 
6381  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6382  void
6383  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
6384  packFillActiveNew (const Kokkos::DualView<const local_ordinal_type*,
6385  buffer_device_type>& exportLIDs,
6386  Kokkos::DualView<packet_type*,
6387  buffer_device_type>& exports,
6388  Kokkos::DualView<size_t*,
6389  buffer_device_type> numPacketsPerLID,
6390  size_t& constantNumPackets,
6391  Distributor& distor) const
6392  {
6393  using std::endl;
6394  using LO = local_ordinal_type;
6395  using GO = global_ordinal_type;
6396  using host_execution_space = typename Kokkos::View<size_t*,
6397  device_type>::HostMirror::execution_space;
6398  using host_device_type =
6399  Kokkos::Device<host_execution_space, Kokkos::HostSpace>;
6400  using device_execution_space = typename device_type::execution_space;
6401  using exports_dv_type =
6402  Kokkos::DualView<packet_type*, buffer_device_type>;
6403  const char tfecfFuncName[] = "packFillActiveNew: ";
6404  const bool debug = ::Tpetra::Details::Behavior::debug ("CrsGraph");
6405  const int myRank = debug ? this->getMap ()->getComm ()->getRank () : 0;
6406 
6407  std::unique_ptr<std::string> prefix;
6408  if (debug) {
6409  std::ostringstream os;
6410  os << "Proc " << myRank << ": Tpetra::CrsGraph::packFillActiveNew: ";
6411  prefix = std::unique_ptr<std::string> (new std::string (os.str ()));
6412  os << "Start" << endl;
6413  std::cerr << os.str ();
6414  }
6415 
6416  const auto numExportLIDs = exportLIDs.extent (0);
6417  if (debug) {
6418  std::ostringstream os;
6419  os << *prefix << "numExportLIDs: " << numExportLIDs
6420  << ", numPacketsPerLID.extent(0): " << numPacketsPerLID.extent (0)
6421  << endl;
6422  std::cerr << os.str ();
6423  }
6424  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6425  (numExportLIDs != numPacketsPerLID.extent (0), std::runtime_error,
6426  "exportLIDs.extent(0) = " << numExportLIDs
6427  << " != numPacketsPerLID.extent(0) = "
6428  << numPacketsPerLID.extent (0) << ".");
6429  TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
6430  auto exportLIDs_h = exportLIDs.view_host ();
6431 
6432  // We may be accessing UVM data on host below, so ensure that the
6433  // device is done accessing it.
6434  device_execution_space().fence ();
6435 
6436  const map_type& rowMap = * (this->getRowMap ());
6437  const map_type* const colMapPtr = this->colMap_.getRawPtr ();
6438  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6439  (this->isLocallyIndexed () && colMapPtr == nullptr, std::logic_error,
6440  "This graph claims to be locally indexed, but its column Map is nullptr. "
6441  "This should never happen. Please report this bug to the Tpetra "
6442  "developers.");
6443 
6444  // We may pack different amounts of data for different rows.
6445  constantNumPackets = 0;
6446 
6447  numPacketsPerLID.clear_sync_state ();
6448  numPacketsPerLID.modify_host ();
6449  auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6450 
6451  // Count the total number of packets (column indices, in the case
6452  // of a CrsGraph) to pack. While doing so, set
6453  // numPacketsPerLID[i] to the number of entries owned by the
6454  // calling process in (local) row exportLIDs[i] of the graph, that
6455  // the caller wants us to send out.
6456  using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
6457  range_type inputRange (0, numExportLIDs);
6458  size_t totalNumPackets = 0;
6459  size_t errCount = 0;
6460  // lambdas turn what they capture const, so we can't
6461  // atomic_add(&errCount,1). Instead, we need a View to modify.
6462  Kokkos::View<size_t, host_device_type> errCountView (&errCount);
6463  constexpr size_t ONE = 1;
6464 
6465  if (debug) {
6466  std::ostringstream os;
6467  os << *prefix << "Compute totalNumPackets" << endl;
6468  std::cerr << os.str ();
6469  }
6470 
6471  Kokkos::parallel_reduce
6472  ("Tpetra::CrsGraph::pack: totalNumPackets",
6473  inputRange,
6474  [=] (const LO i, size_t& curTotalNumPackets) {
6475  const LO lclRow = exportLIDs_h[i];
6476  const GO gblRow = rowMap.getGlobalElement (lclRow);
6477  if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
6478  if (debug) {
6479  std::ostringstream os;
6480  os << "Proc " << myRank << ": For i=" << i << ", lclRow="
6481  << lclRow << " not in row Map on this process" << endl;
6482  std::cerr << os.str ();
6483  }
6484  Kokkos::atomic_add (&errCountView(), ONE);
6485  numPacketsPerLID_h(i) = 0;
6486  }
6487  else {
6488  const size_t numEnt = this->getNumEntriesInGlobalRow (gblRow);
6489  numPacketsPerLID_h(i) = numEnt;
6490  curTotalNumPackets += numEnt;
6491  }
6492  },
6493  totalNumPackets);
6494 
6495  if (debug) {
6496  std::ostringstream os;
6497  os << *prefix << "totalNumPackets: " << totalNumPackets
6498  << ", errCount: " << errCount << endl;
6499  std::cerr << os.str ();
6500  }
6501  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6502  (errCount != 0, std::logic_error, "totalNumPackets count encountered "
6503  "one or more errors! totalNumPackets: " << totalNumPackets
6504  << ", errCount: " << errCount << ".");
6505 
6506  // Allocate space for all the column indices to pack.
6507  if (static_cast<size_t> (exports.extent (0)) < totalNumPackets) {
6508  // FIXME (mfh 09 Apr 2019) Create without initializing.
6509  exports = exports_dv_type ("exports", totalNumPackets);
6510  }
6511 
6512  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6513  (! this->supportsRowViews (), std::logic_error,
6514  "this->supportsRowViews() returns false; this should never happen. "
6515  "Please report this bug to the Tpetra developers.");
6516 
6517  // Loop again over the rows to export, and pack rows of indices
6518  // into the output buffer.
6519 
6520  if (debug) {
6521  std::ostringstream os;
6522  os << *prefix << "Pack into exports buffer" << endl;
6523  std::cerr << os.str ();
6524  }
6525 
6526  exports.clear_sync_state ();
6527  exports.modify_host ();
6528  auto exports_h = exports.view_host ();
6529 
6530  // The graph may store its data in UVM memory, so make sure that
6531  // any device kernels are done modifying the graph's data before
6532  // reading the data.
6533  device_execution_space().fence ();
6534 
6535  errCount = 0;
6536  Kokkos::parallel_scan
6537  ("Tpetra::CrsGraph::packFillActiveNew: Pack exports",
6538  inputRange,
6539  [=] (const LO i, size_t& exportsOffset, const bool final) {
6540  const size_t curOffset = exportsOffset;
6541  const LO lclRow = exportLIDs_h(i);
6542  const GO gblRow = rowMap.getGlobalElement (lclRow);
6543  if (gblRow == Details::OrdinalTraits<GO>::invalid ()) {
6544  if (debug) {
6545  std::ostringstream os;
6546  os << "Proc " << myRank << ": For i=" << i << ", lclRow="
6547  << lclRow << " not in row Map on this process" << endl;
6548  std::cerr << os.str ();
6549  }
6550  Kokkos::atomic_add (&errCountView(), ONE);
6551  return;
6552  }
6553 
6554  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (gblRow);
6555  if (rowInfo.localRow == Details::OrdinalTraits<size_t>::invalid ()) {
6556  if (debug) {
6557  std::ostringstream os;
6558  os << "Proc " << myRank << ": For i=" << i << ", lclRow="
6559  << lclRow << ", gblRow=" << gblRow << ": invalid rowInfo"
6560  << endl;
6561  std::cerr << os.str ();
6562  }
6563  Kokkos::atomic_add (&errCountView(), ONE);
6564  return;
6565  }
6566 
6567  if (curOffset + rowInfo.numEntries > totalNumPackets) {
6568  if (debug) {
6569  std::ostringstream os;
6570  os << "Proc " << myRank << ": For i=" << i << ", lclRow="
6571  << lclRow << ", gblRow=" << gblRow << ", curOffset "
6572  "(= " << curOffset << ") + numEnt (= " << rowInfo.numEntries
6573  << ") > totalNumPackets (= " << totalNumPackets << ")."
6574  << endl;
6575  std::cerr << os.str ();
6576  }
6577  Kokkos::atomic_add (&errCountView(), ONE);
6578  return;
6579  }
6580 
6581  const LO numEnt = static_cast<LO> (rowInfo.numEntries);
6582  if (this->isLocallyIndexed ()) {
6583  const LO* lclColInds = nullptr;
6584  LO capacity = 0;
6585  const LO errCode =
6586  this->getLocalViewRawConst (lclColInds, capacity, rowInfo);
6587  if (errCode == 0) {
6588  if (final) {
6589  for (LO k = 0; k < numEnt; ++k) {
6590  const LO lclColInd = lclColInds[k];
6591  const GO gblColInd = colMapPtr->getGlobalElement (lclColInd);
6592  // Pack it, even if it's wrong. Let the receiving
6593  // process deal with it. Otherwise, we'll miss out
6594  // on any correct data.
6595  exports_h(curOffset + k) = gblColInd;
6596  } // for each entry in the row
6597  } // final pass?
6598  exportsOffset = curOffset + numEnt;
6599  }
6600  else { // error in getting local row view
6601  if (debug) {
6602  std::ostringstream os;
6603  os << "Proc " << myRank << ": For i=" << i << ", lclRow="
6604  << lclRow << ", gblRow=" << gblRow << ": "
6605  "getLocalViewRawConst returned nonzero error code "
6606  << errCode << endl;
6607  std::cerr << os.str ();
6608  }
6609  Kokkos::atomic_add (&errCountView(), ONE);
6610  }
6611  }
6612  else if (this->isGloballyIndexed ()) {
6613  const GO* gblColInds = nullptr;
6614  LO capacity = 0;
6615  const LO errCode =
6616  this->getGlobalViewRawConst (gblColInds, capacity, rowInfo);
6617  if (errCode == 0) {
6618  if (final) {
6619  for (LO k = 0; k < numEnt; ++k) {
6620  const GO gblColInd = gblColInds[k];
6621  // Pack it, even if it's wrong. Let the receiving
6622  // process deal with it. Otherwise, we'll miss out
6623  // on any correct data.
6624  exports_h(curOffset + k) = gblColInd;
6625  } // for each entry in the row
6626  } // final pass?
6627  exportsOffset = curOffset + numEnt;
6628  }
6629  else { // error in getting global row view
6630  if (debug) {
6631  std::ostringstream os;
6632  os << "Proc " << myRank << ": For i=" << i << ", lclRow="
6633  << lclRow << ", gblRow=" << gblRow << ": "
6634  "getGlobalViewRawConst returned nonzero error code "
6635  << errCode << endl;
6636  std::cerr << os.str ();
6637  }
6638  Kokkos::atomic_add (&errCountView(), ONE);
6639  }
6640  }
6641  // If neither globally nor locally indexed, then the graph
6642  // has no entries in this row (or indeed, in any row on this
6643  // process) to pack.
6644  });
6645 
6646  // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6647  // (errCount != 0, std::logic_error, "Packing encountered "
6648  // "one or more errors! errCount = " << errCount
6649  // << ", totalNumPackets = " << totalNumPackets << ".");
6650 
6651  if (debug) {
6652  std::ostringstream os;
6653  os << *prefix << "errCount = " << errCount << "; Done" << endl;
6654  std::cerr << os.str ();
6655  }
6656  }
6657 
6658  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6659  void
6660  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
6661 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
6662  unpackAndCombineNew
6663 #else // TPETRA_ENABLE_DEPRECATED_CODE
6664  unpackAndCombine
6665 #endif // TPETRA_ENABLE_DEPRECATED_CODE
6666  (const Kokkos::DualView<const local_ordinal_type*,
6667  buffer_device_type>& importLIDs,
6668  Kokkos::DualView<packet_type*,
6669  buffer_device_type> imports,
6670  Kokkos::DualView<size_t*,
6671  buffer_device_type> numPacketsPerLID,
6672  const size_t /* constantNumPackets */,
6673  Distributor& /* distor */,
6674  const CombineMode /* combineMode */ )
6675  {
6676  using std::endl;
6677  using LO = local_ordinal_type;
6678  using GO = global_ordinal_type;
6679  const char tfecfFuncName[] = "unpackAndCombine: ";
6680  const bool debug = ::Tpetra::Details::Behavior::debug ("CrsGraph");
6681 
6682  std::unique_ptr<std::string> prefix;
6683  if (debug) {
6684  std::ostringstream os;
6685  const int myRank = this->getMap ()->getComm ()->getRank ();
6686  os << "Proc " << myRank << ": Tpetra::CrsGraph::unpackAndCombine: ";
6687  prefix = std::unique_ptr<std::string> (new std::string (os.str ()));
6688  os << endl;
6689  std::cerr << os.str ();
6690  }
6691 
6692  if (this->getProfileType () == StaticProfile) {
6693  auto padding = computeCrsPadding (importLIDs, numPacketsPerLID);
6694  applyCrsPadding(padding);
6695  }
6696  // FIXME (mfh 02 Apr 2012) REPLACE combine mode has a perfectly
6697  // reasonable meaning, whether or not the matrix is fill complete.
6698  // It's just more work to implement.
6699 
6700  // We are not checking the value of the CombineMode input
6701  // argument. For CrsGraph, we only support import/export
6702  // operations if fillComplete has not yet been called. Any
6703  // incoming column-indices are inserted into the target graph. In
6704  // this context, CombineMode values of ADD vs INSERT are
6705  // equivalent. What is the meaning of REPLACE for CrsGraph? If a
6706  // duplicate column-index is inserted, it will be compressed out
6707  // when fillComplete is called.
6708  //
6709  // Note: I think REPLACE means that an existing row is replaced by
6710  // the imported row, i.e., the existing indices are cleared. CGB,
6711  // 6/17/2010
6712 
6713  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6714  (importLIDs.extent (0) != numPacketsPerLID.extent (0),
6715  std::runtime_error, "importLIDs.extent(0) = "
6716  << importLIDs.extent (0) << " != numPacketsPerLID.extent(0) = "
6717  << numPacketsPerLID.extent (0) << ".");
6718  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6719  (isFillComplete (), std::runtime_error,
6720  "Import or Export operations are not allowed on the destination "
6721  "CrsGraph if it is fill complete.");
6722 
6723  const size_t numImportLIDs = static_cast<size_t> (importLIDs.extent (0));
6724  if (numPacketsPerLID.need_sync_host ()) {
6725  numPacketsPerLID.sync_host ();
6726  }
6727  auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6728 
6729  // If we're inserting in local indices, let's pre-allocate
6730  Teuchos::Array<LO> lclColInds;
6731  if (this->isLocallyIndexed ()) {
6732  size_t maxNumInserts = 0;
6733  for (size_t i = 0; i < numImportLIDs; ++i) {
6734  maxNumInserts = std::max (maxNumInserts, numPacketsPerLID_h[i]);
6735  }
6736  lclColInds.resize (maxNumInserts);
6737  }
6738 
6739  auto importLIDs_h = importLIDs.view_host ();
6740  if (imports.need_sync_host ()) {
6741  imports.sync_host ();
6742  }
6743  auto imports_h = imports.view_host ();
6744 
6745  const map_type& rowMap = * (this->rowMap_);
6746  size_t importsOffset = 0;
6747  for (size_t i = 0; i < numImportLIDs; ++i) {
6748  const LO lclRow = importLIDs_h[i];
6749  const GO gblRow = rowMap.getGlobalElement (lclRow);
6750  const LO numEnt = numPacketsPerLID_h[i];
6751  const GO* const gblColInds = (numEnt == 0) ? nullptr :
6752  &imports_h[importsOffset];
6753  if (! this->isLocallyIndexed ()) {
6754  if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
6755  // This row is not in the row Map on the calling process.
6756  this->insertGlobalIndicesIntoNonownedRows (gblRow, gblColInds, numEnt);
6757  }
6758  else {
6759  this->insertGlobalIndicesFiltered (lclRow, gblColInds, numEnt);
6760  }
6761  }
6762  else {
6763  for (LO j = 0; j < numEnt; j++) {
6764  lclColInds[j] = this->colMap_->getLocalElement (gblColInds[j]);
6765  }
6766  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6767  (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid (),
6768  std::runtime_error,
6769  "cannot insert into unowned rows if isLocallyIndexed().");
6770  this->insertLocalIndices (lclRow, numEnt, lclColInds.data ());
6771  }
6772  importsOffset += numEnt;
6773  }
6774 
6775 
6776  if (debug) {
6777  std::ostringstream os;
6778  os << *prefix << "Done" << endl;
6779  std::cerr << os.str ();
6780  }
6781  }
6782 
6783  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6784  void
6786  removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& newMap)
6787  {
6788  using Teuchos::Comm;
6789  using Teuchos::null;
6790  using Teuchos::ParameterList;
6791  using Teuchos::RCP;
6792 
6793  // We'll set all the state "transactionally," so that this method
6794  // satisfies the strong exception guarantee. This object's state
6795  // won't be modified until the end of this method.
6796  RCP<const map_type> rowMap, domainMap, rangeMap, colMap;
6797  RCP<import_type> importer;
6798  RCP<export_type> exporter;
6799 
6800  rowMap = newMap;
6801  RCP<const Comm<int> > newComm =
6802  (newMap.is_null ()) ? null : newMap->getComm ();
6803 
6804  if (! domainMap_.is_null ()) {
6805  if (domainMap_.getRawPtr () == rowMap_.getRawPtr ()) {
6806  // Common case: original domain and row Maps are identical.
6807  // In that case, we need only replace the original domain Map
6808  // with the new Map. This ensures that the new domain and row
6809  // Maps _stay_ identical.
6810  domainMap = newMap;
6811  } else {
6812  domainMap = domainMap_->replaceCommWithSubset (newComm);
6813  }
6814  }
6815  if (! rangeMap_.is_null ()) {
6816  if (rangeMap_.getRawPtr () == rowMap_.getRawPtr ()) {
6817  // Common case: original range and row Maps are identical. In
6818  // that case, we need only replace the original range Map with
6819  // the new Map. This ensures that the new range and row Maps
6820  // _stay_ identical.
6821  rangeMap = newMap;
6822  } else {
6823  rangeMap = rangeMap_->replaceCommWithSubset (newComm);
6824  }
6825  }
6826  if (! colMap.is_null ()) {
6827  colMap = colMap_->replaceCommWithSubset (newComm);
6828  }
6829 
6830  // (Re)create the Export and / or Import if necessary.
6831  if (! newComm.is_null ()) {
6832  RCP<ParameterList> params = this->getNonconstParameterList (); // could be null
6833  //
6834  // The operations below are collective on the new communicator.
6835  //
6836  // (Re)create the Export object if necessary. If I haven't
6837  // called fillComplete yet, I don't have a rangeMap, so I must
6838  // first check if the _original_ rangeMap is not null. Ditto
6839  // for the Import object and the domain Map.
6840  if (! rangeMap_.is_null () &&
6841  rangeMap != rowMap &&
6842  ! rangeMap->isSameAs (*rowMap)) {
6843  if (params.is_null () || ! params->isSublist ("Export")) {
6844  exporter = rcp (new export_type (rowMap, rangeMap));
6845  }
6846  else {
6847  RCP<ParameterList> exportSublist = sublist (params, "Export", true);
6848  exporter = rcp (new export_type (rowMap, rangeMap, exportSublist));
6849  }
6850  }
6851  // (Re)create the Import object if necessary.
6852  if (! domainMap_.is_null () &&
6853  domainMap != colMap &&
6854  ! domainMap->isSameAs (*colMap)) {
6855  if (params.is_null () || ! params->isSublist ("Import")) {
6856  importer = rcp (new import_type (domainMap, colMap));
6857  } else {
6858  RCP<ParameterList> importSublist = sublist (params, "Import", true);
6859  importer = rcp (new import_type (domainMap, colMap, importSublist));
6860  }
6861  }
6862  } // if newComm is not null
6863 
6864  // Defer side effects until the end. If no destructors throw
6865  // exceptions (they shouldn't anyway), then this method satisfies
6866  // the strong exception guarantee.
6867  exporter_ = exporter;
6868  importer_ = importer;
6869  rowMap_ = rowMap;
6870  // mfh 31 Mar 2013: DistObject's map_ is the row Map of a CrsGraph
6871  // or CrsMatrix. CrsGraph keeps a redundant pointer (rowMap_) to
6872  // the same object. We might want to get rid of this redundant
6873  // pointer sometime, but for now, we'll leave it alone and just
6874  // set map_ to the same object.
6875  this->map_ = rowMap;
6876  domainMap_ = domainMap;
6877  rangeMap_ = rangeMap;
6878  colMap_ = colMap;
6879  }
6880 
6881  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6882  void
6884  getLocalDiagOffsets (const Kokkos::View<size_t*, device_type, Kokkos::MemoryUnmanaged>& offsets) const
6885  {
6886  typedef LocalOrdinal LO;
6887  typedef GlobalOrdinal GO;
6888  const char tfecfFuncName[] = "getLocalDiagOffsets: ";
6889  const bool debug = ::Tpetra::Details::Behavior::debug ();
6890 
6891  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6892  (! hasColMap (), std::runtime_error, "The graph must have a column Map.");
6893  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
6894  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6895  (static_cast<LO> (offsets.extent (0)) < lclNumRows,
6896  std::invalid_argument, "offsets.extent(0) = " <<
6897  offsets.extent (0) << " < getNodeNumRows() = " << lclNumRows << ".");
6898 
6899  const map_type& rowMap = * (this->getRowMap ());
6900  const map_type& colMap = * (this->getColMap ());
6901 
6902  // We only use these in debug mode, but since debug mode is a
6903  // run-time option, they need to exist here. That's why we create
6904  // the vector with explicit size zero, to avoid overhead if debug
6905  // mode is off.
6906  bool allRowMapDiagEntriesInColMap = true;
6907  bool allDiagEntriesFound = true;
6908  bool allOffsetsCorrect = true;
6909  bool noOtherWeirdness = true;
6910  using wrong_offsets_type = std::vector<std::pair<LO, size_t> >;
6911  wrong_offsets_type wrongOffsets (0);
6912 
6913  // mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just
6914  // the subset of Map functionality that we need below.
6915  auto lclRowMap = rowMap.getLocalMap ();
6916  auto lclColMap = colMap.getLocalMap ();
6917 
6918  // FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this
6919  // setup, at least on the host. For CUDA, we have to use LocalMap
6920  // (that comes from each of the two Maps).
6921 
6922  const bool sorted = this->isSorted ();
6923  if (isFillComplete ()) {
6924  auto lclGraph = this->getLocalGraph ();
6925  ::Tpetra::Details::getGraphDiagOffsets (offsets, lclRowMap, lclColMap,
6926  lclGraph.row_map,
6927  lclGraph.entries, sorted);
6928  }
6929  else {
6930  // NOTE (mfh 22 Feb 2017): We have to run this code on host,
6931  // since the graph is not fill complete. The previous version
6932  // of this code assumed UVM; this version does not.
6933  auto offsets_h = Kokkos::create_mirror_view (offsets);
6934 
6935  for (LO lclRowInd = 0; lclRowInd < lclNumRows; ++lclRowInd) {
6936  // Find the diagonal entry. Since the row Map and column Map
6937  // may differ, we have to compare global row and column
6938  // indices, not local.
6939  const GO gblRowInd = lclRowMap.getGlobalElement (lclRowInd);
6940  const GO gblColInd = gblRowInd;
6941  const LO lclColInd = lclColMap.getLocalElement (gblColInd);
6942 
6943  if (lclColInd == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
6944  allRowMapDiagEntriesInColMap = false;
6945  offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
6946  }
6947  else {
6948  const RowInfo rowInfo = this->getRowInfo (lclRowInd);
6949  if (static_cast<LO> (rowInfo.localRow) == lclRowInd &&
6950  rowInfo.numEntries > 0) {
6951 
6952  auto colInds = this->getLocalKokkosRowView (rowInfo);
6953  const size_t hint = 0; // not needed for this algorithm
6954  const size_t offset =
6955  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
6956  lclColInd, hint, sorted);
6957  offsets_h(lclRowInd) = offset;
6958 
6959  if (debug) {
6960  // Now that we have what we think is an offset, make sure
6961  // that it really does point to the diagonal entry. Offsets
6962  // are _relative_ to each row, not absolute (for the whole
6963  // (local) graph).
6964  Teuchos::ArrayView<const LO> lclColInds;
6965  try {
6966  this->getLocalRowView (lclRowInd, lclColInds);
6967  }
6968  catch (...) {
6969  noOtherWeirdness = false;
6970  }
6971  // Don't continue with error checking if the above failed.
6972  if (noOtherWeirdness) {
6973  const size_t numEnt = lclColInds.size ();
6974  if (offset >= numEnt) {
6975  // Offsets are relative to each row, so this means that
6976  // the offset is out of bounds.
6977  allOffsetsCorrect = false;
6978  wrongOffsets.push_back (std::make_pair (lclRowInd, offset));
6979  } else {
6980  const LO actualLclColInd = lclColInds[offset];
6981  const GO actualGblColInd = lclColMap.getGlobalElement (actualLclColInd);
6982  if (actualGblColInd != gblColInd) {
6983  allOffsetsCorrect = false;
6984  wrongOffsets.push_back (std::make_pair (lclRowInd, offset));
6985  }
6986  }
6987  }
6988  } // debug
6989  }
6990  else { // either row is empty, or something went wrong w/ getRowInfo()
6991  offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
6992  allDiagEntriesFound = false;
6993  }
6994  } // whether lclColInd is a valid local column index
6995  } // for each local row
6996 
6997  Kokkos::deep_copy (offsets, offsets_h);
6998  } // whether the graph is fill complete
6999 
7000  if (debug) {
7001  if (wrongOffsets.size () != 0) {
7002  std::ostringstream os;
7003  os << "Proc " << this->getComm ()->getRank () << ": Wrong offsets: [";
7004  for (size_t k = 0; k < wrongOffsets.size (); ++k) {
7005  os << "(" << wrongOffsets[k].first << ","
7006  << wrongOffsets[k].second << ")";
7007  if (k + 1 < wrongOffsets.size ()) {
7008  os << ", ";
7009  }
7010  }
7011  os << "]" << std::endl;
7012  std::cerr << os.str ();
7013  }
7014  } // debug
7015 
7016  if (debug) {
7017  using Teuchos::reduceAll;
7018  using std::endl;
7019  Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getComm ();
7020  const bool localSuccess =
7021  allRowMapDiagEntriesInColMap && allDiagEntriesFound && allOffsetsCorrect;
7022  const int numResults = 5;
7023  int lclResults[5];
7024  lclResults[0] = allRowMapDiagEntriesInColMap ? 1 : 0;
7025  lclResults[1] = allDiagEntriesFound ? 1 : 0;
7026  lclResults[2] = allOffsetsCorrect ? 1 : 0;
7027  lclResults[3] = noOtherWeirdness ? 1 : 0;
7028  // min-all-reduce will compute least rank of all the processes
7029  // that didn't succeed.
7030  lclResults[4] = ! localSuccess ? comm->getRank () : comm->getSize ();
7031 
7032  int gblResults[5];
7033  gblResults[0] = 0;
7034  gblResults[1] = 0;
7035  gblResults[2] = 0;
7036  gblResults[3] = 0;
7037  gblResults[4] = 0;
7038  reduceAll<int, int> (*comm, Teuchos::REDUCE_MIN,
7039  numResults, lclResults, gblResults);
7040 
7041  if (gblResults[0] != 1 || gblResults[1] != 1 || gblResults[2] != 1
7042  || gblResults[3] != 1) {
7043  std::ostringstream os; // build error message
7044  os << "Issue(s) that we noticed (on Process " << gblResults[4] << ", "
7045  "possibly among others): " << endl;
7046  if (gblResults[0] == 0) {
7047  os << " - The column Map does not contain at least one diagonal entry "
7048  "of the graph." << endl;
7049  }
7050  if (gblResults[1] == 0) {
7051  os << " - On one or more processes, some row does not contain a "
7052  "diagonal entry." << endl;
7053  }
7054  if (gblResults[2] == 0) {
7055  os << " - On one or more processes, some offsets are incorrect."
7056  << endl;
7057  }
7058  if (gblResults[3] == 0) {
7059  os << " - One or more processes had some other error."
7060  << endl;
7061  }
7062  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str());
7063  }
7064  } // debug
7065  }
7066 
7067  namespace { // (anonymous)
7068 
7069  // mfh 21 Jan 2016: This is useful for getLocalDiagOffsets (see
7070  // below). The point is to avoid the deep copy between the input
7071  // Teuchos::ArrayRCP and the internally used Kokkos::View. We
7072  // can't use UVM to avoid the deep copy with CUDA, because the
7073  // ArrayRCP is a host pointer, while the input to the graph's
7074  // getLocalDiagOffsets method is a device pointer. Assigning a
7075  // host pointer to a device pointer is incorrect unless the host
7076  // pointer points to host pinned memory. The goal is to get rid
7077  // of the Teuchos::ArrayRCP overload anyway, so we accept the deep
7078  // copy for backwards compatibility.
7079  //
7080  // We have to use template magic because
7081  // "staticGraph_->getLocalDiagOffsets(offsetsHosts)" won't compile
7082  // if device_type::memory_space is not Kokkos::HostSpace (as is
7083  // the case with CUDA).
7084 
7085  template<class DeviceType,
7086  const bool memSpaceIsHostSpace =
7087  std::is_same<typename DeviceType::memory_space,
7088  Kokkos::HostSpace>::value>
7089  struct HelpGetLocalDiagOffsets {};
7090 
7091  template<class DeviceType>
7092  struct HelpGetLocalDiagOffsets<DeviceType, true> {
7093  typedef DeviceType device_type;
7094  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
7095  Kokkos::MemoryUnmanaged> device_offsets_type;
7096  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
7097  Kokkos::MemoryUnmanaged> host_offsets_type;
7098 
7099  static device_offsets_type
7100  getDeviceOffsets (const host_offsets_type& hostOffsets)
7101  {
7102  // Host and device are the same; no need to allocate a
7103  // temporary device View.
7104  return hostOffsets;
7105  }
7106 
7107  static void
7108  copyBackIfNeeded (const host_offsets_type& /* hostOffsets */,
7109  const device_offsets_type& /* deviceOffsets */)
7110  { /* copy back not needed; host and device are the same */ }
7111  };
7112 
7113  template<class DeviceType>
7114  struct HelpGetLocalDiagOffsets<DeviceType, false> {
7115  typedef DeviceType device_type;
7116  // We have to do a deep copy, since host memory space != device
7117  // memory space. Thus, the device View is managed (we need to
7118  // allocate a temporary device View).
7119  typedef Kokkos::View<size_t*, device_type> device_offsets_type;
7120  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
7121  Kokkos::MemoryUnmanaged> host_offsets_type;
7122 
7123  static device_offsets_type
7124  getDeviceOffsets (const host_offsets_type& hostOffsets)
7125  {
7126  // Host memory space != device memory space, so we must
7127  // allocate a temporary device View for the graph.
7128  return device_offsets_type ("offsets", hostOffsets.extent (0));
7129  }
7130 
7131  static void
7132  copyBackIfNeeded (const host_offsets_type& hostOffsets,
7133  const device_offsets_type& deviceOffsets)
7134  {
7135  Kokkos::deep_copy (hostOffsets, deviceOffsets);
7136  }
7137  };
7138  } // namespace (anonymous)
7139 
7140 
7141  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7142  void
7144  getLocalDiagOffsets (Teuchos::ArrayRCP<size_t>& offsets) const
7145  {
7146  typedef LocalOrdinal LO;
7147  const char tfecfFuncName[] = "getLocalDiagOffsets: ";
7148  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7149  (! this->hasColMap (), std::runtime_error,
7150  "The graph does not yet have a column Map.");
7151  const LO myNumRows = static_cast<LO> (this->getNodeNumRows ());
7152  if (static_cast<LO> (offsets.size ()) != myNumRows) {
7153  // NOTE (mfh 21 Jan 2016) This means that the method does not
7154  // satisfy the strong exception guarantee (no side effects
7155  // unless successful).
7156  offsets.resize (myNumRows);
7157  }
7158 
7159  // mfh 21 Jan 2016: This method unfortunately takes a
7160  // Teuchos::ArrayRCP, which is host memory. The graph wants a
7161  // device pointer. We can't access host memory from the device;
7162  // that's the wrong direction for UVM. (It's the right direction
7163  // for inefficient host pinned memory, but we don't want to use
7164  // that here.) Thus, if device memory space != host memory space,
7165  // we allocate and use a temporary device View to get the offsets.
7166  // If the two spaces are equal, the template magic makes the deep
7167  // copy go away.
7168  typedef HelpGetLocalDiagOffsets<device_type> helper_type;
7169  typedef typename helper_type::host_offsets_type host_offsets_type;
7170  // Unmanaged host View that views the output array.
7171  host_offsets_type hostOffsets (offsets.getRawPtr (), myNumRows);
7172  // Allocate temp device View if host != device, else reuse host array.
7173  auto deviceOffsets = helper_type::getDeviceOffsets (hostOffsets);
7174  // NOT recursion; this calls the overload that takes a device View.
7175  this->getLocalDiagOffsets (deviceOffsets);
7176  helper_type::copyBackIfNeeded (hostOffsets, deviceOffsets);
7177  }
7178 
7179  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7180  bool
7183  return true;
7184  }
7185 
7186  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7187  void
7190  const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& rowTransfer,
7191  const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node> > & domainTransfer,
7192  const Teuchos::RCP<const map_type>& domainMap,
7193  const Teuchos::RCP<const map_type>& rangeMap,
7194  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7195  {
7200  using Teuchos::ArrayRCP;
7201  using Teuchos::ArrayView;
7202  using Teuchos::Comm;
7203  using Teuchos::ParameterList;
7204  using Teuchos::rcp;
7205  using Teuchos::RCP;
7206 #ifdef HAVE_TPETRA_MMM_TIMINGS
7207  using std::string;
7208  using Teuchos::TimeMonitor;
7209 #endif
7210 
7211  using LO = LocalOrdinal;
7212  using GO = GlobalOrdinal;
7213  using NT = node_type;
7214  using this_type = CrsGraph<LO, GO, NT>;
7215  using ivector_type = Vector<int, LO, GO, NT>;
7216  using packet_type = typename this_type::packet_type;
7217 
7218  const char* prefix = "Tpetra::CrsGraph::transferAndFillComplete: ";
7219 
7220 #ifdef HAVE_TPETRA_MMM_TIMINGS
7221  string label;
7222  if(!params.is_null()) label = params->get("Timer Label", label);
7223  string prefix2 = string("Tpetra ")+ label + std::string(": CrsGraph TAFC ");
7224  RCP<TimeMonitor> MM =
7225  rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Pack-1"))));
7226 #endif
7227 
7228  // Make sure that the input argument rowTransfer is either an
7229  // Import or an Export. Import and Export are the only two
7230  // subclasses of Transfer that we defined, but users might
7231  // (unwisely, for now at least) decide to implement their own
7232  // subclasses. Exclude this possibility.
7233  const import_type* xferAsImport = dynamic_cast<const import_type*>(&rowTransfer);
7234  const export_type* xferAsExport = dynamic_cast<const export_type*>(&rowTransfer);
7235  TEUCHOS_TEST_FOR_EXCEPTION(
7236  xferAsImport == nullptr && xferAsExport == nullptr, std::invalid_argument,
7237  prefix << "The 'rowTransfer' input argument must be either an Import or "
7238  "an Export, and its template parameters must match the corresponding "
7239  "template parameters of the CrsGraph.");
7240 
7241  // Make sure that the input argument domainTransfer is either an
7242  // Import or an Export. Import and Export are the only two
7243  // subclasses of Transfer that we defined, but users might
7244  // (unwisely, for now at least) decide to implement their own
7245  // subclasses. Exclude this possibility.
7246  Teuchos::RCP<const import_type> xferDomainAsImport =
7247  Teuchos::rcp_dynamic_cast<const import_type>(domainTransfer);
7248  Teuchos::RCP<const export_type> xferDomainAsExport =
7249  Teuchos::rcp_dynamic_cast<const export_type>(domainTransfer);
7250 
7251  if(! domainTransfer.is_null()) {
7252 
7253  TEUCHOS_TEST_FOR_EXCEPTION(
7254  (xferDomainAsImport.is_null() && xferDomainAsExport.is_null()), std::invalid_argument,
7255  prefix << "The 'domainTransfer' input argument must be either an "
7256  "Import or an Export, and its template parameters must match the "
7257  "corresponding template parameters of the CrsGraph.");
7258 
7259  TEUCHOS_TEST_FOR_EXCEPTION(
7260  ( xferAsImport != nullptr || ! xferDomainAsImport.is_null() ) &&
7261  (( xferAsImport != nullptr && xferDomainAsImport.is_null() ) ||
7262  ( xferAsImport == nullptr && ! xferDomainAsImport.is_null() )), std::invalid_argument,
7263  prefix << "The 'rowTransfer' and 'domainTransfer' input arguments "
7264  "must be of the same type (either Import or Export).");
7265 
7266  TEUCHOS_TEST_FOR_EXCEPTION(
7267  ( xferAsExport != nullptr || ! xferDomainAsExport.is_null() ) &&
7268  (( xferAsExport != nullptr && xferDomainAsExport.is_null() ) ||
7269  ( xferAsExport == nullptr && ! xferDomainAsExport.is_null() )), std::invalid_argument,
7270  prefix << "The 'rowTransfer' and 'domainTransfer' input arguments "
7271  "must be of the same type (either Import or Export).");
7272 
7273  } // domainTransfer != null
7274 
7275 
7276  // FIXME (mfh 15 May 2014) Wouldn't communication still be needed,
7277  // if the source Map is not distributed but the target Map is?
7278  const bool communication_needed = rowTransfer.getSourceMap()->isDistributed();
7279 
7280  //
7281  // Get the caller's parameters
7282  //
7283 
7284  bool reverseMode = false; // Are we in reverse mode?
7285  bool restrictComm = false; // Do we need to restrict the communicator?
7286  RCP<ParameterList> graphparams; // parameters for the destination graph
7287  if (! params.is_null()) {
7288  reverseMode = params->get("Reverse Mode", reverseMode);
7289  restrictComm = params->get("Restrict Communicator", restrictComm);
7290  graphparams = sublist(params, "CrsGraph");
7291  }
7292 
7293  // Get the new domain and range Maps. We need some of them for error
7294  // checking, now that we have the reverseMode parameter.
7295  RCP<const map_type> MyRowMap = reverseMode ?
7296  rowTransfer.getSourceMap() : rowTransfer.getTargetMap();
7297  RCP<const map_type> MyColMap; // create this below
7298  RCP<const map_type> MyDomainMap = ! domainMap.is_null() ? domainMap : getDomainMap();
7299  RCP<const map_type> MyRangeMap = ! rangeMap.is_null() ? rangeMap : getRangeMap();
7300  RCP<const map_type> BaseRowMap = MyRowMap;
7301  RCP<const map_type> BaseDomainMap = MyDomainMap;
7302 
7303  // If the user gave us a nonnull destGraph, then check whether it's
7304  // "pristine." That means that it has no entries.
7305  //
7306  // FIXME (mfh 15 May 2014) If this is not true on all processes,
7307  // then this exception test may hang. It would be better to
7308  // forward an error flag to the next communication phase.
7309  if (! destGraph.is_null()) {
7310  // FIXME (mfh 15 May 2014): The Epetra idiom for checking
7311  // whether a graph or matrix has no entries on the calling
7312  // process, is that it is neither locally nor globally indexed.
7313  // This may change eventually with the Kokkos refactor version
7314  // of Tpetra, so it would be better just to check the quantity
7315  // of interest directly. Note that with the Kokkos refactor
7316  // version of Tpetra, asking for the total number of entries in
7317  // a graph or matrix that is not fill complete might require
7318  // computation (kernel launch), since it is not thread scalable
7319  // to update a count every time an entry is inserted.
7320  const bool NewFlag =
7321  ! destGraph->isLocallyIndexed() && ! destGraph->isGloballyIndexed();
7322  TEUCHOS_TEST_FOR_EXCEPTION(! NewFlag, std::invalid_argument,
7323  prefix << "The input argument 'destGraph' is only allowed to be nonnull, "
7324  "if its graph is empty (neither locally nor globally indexed).");
7325 
7326  // FIXME (mfh 15 May 2014) At some point, we want to change
7327  // graphs and matrices so that their DistObject Map
7328  // (this->getMap()) may differ from their row Map. This will
7329  // make redistribution for 2-D distributions more efficient. I
7330  // hesitate to change this check, because I'm not sure how much
7331  // the code here depends on getMap() and getRowMap() being the
7332  // same.
7333  TEUCHOS_TEST_FOR_EXCEPTION(
7334  ! destGraph->getRowMap()->isSameAs(*MyRowMap), std::invalid_argument,
7335  prefix << "The (row) Map of the input argument 'destGraph' is not the "
7336  "same as the (row) Map specified by the input argument 'rowTransfer'.");
7337 
7338  TEUCHOS_TEST_FOR_EXCEPTION(
7339  ! destGraph->checkSizes(*this), std::invalid_argument,
7340  prefix << "You provided a nonnull destination graph, but checkSizes() "
7341  "indicates that it is not a legal legal target for redistribution from "
7342  "the source graph (*this). This may mean that they do not have the "
7343  "same dimensions.");
7344  }
7345 
7346  // If forward mode (the default), then *this's (row) Map must be
7347  // the same as the source Map of the Transfer. If reverse mode,
7348  // then *this's (row) Map must be the same as the target Map of
7349  // the Transfer.
7350  //
7351  // FIXME (mfh 15 May 2014) At some point, we want to change graphs
7352  // and matrices so that their DistObject Map (this->getMap()) may
7353  // differ from their row Map. This will make redistribution for
7354  // 2-D distributions more efficient. I hesitate to change this
7355  // check, because I'm not sure how much the code here depends on
7356  // getMap() and getRowMap() being the same.
7357  TEUCHOS_TEST_FOR_EXCEPTION(
7358  ! (reverseMode || getRowMap()->isSameAs(*rowTransfer.getSourceMap())),
7359  std::invalid_argument, prefix <<
7360  "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
7361 
7362  TEUCHOS_TEST_FOR_EXCEPTION(
7363  ! (! reverseMode || getRowMap()->isSameAs(*rowTransfer.getTargetMap())),
7364  std::invalid_argument, prefix <<
7365  "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
7366 
7367  // checks for domainTransfer
7368  TEUCHOS_TEST_FOR_EXCEPTION(
7369  ! xferDomainAsImport.is_null() && ! xferDomainAsImport->getTargetMap()->isSameAs(*domainMap),
7370  std::invalid_argument,
7371  prefix << "The target map of the 'domainTransfer' input argument must be "
7372  "the same as the rebalanced domain map 'domainMap'");
7373 
7374  TEUCHOS_TEST_FOR_EXCEPTION(
7375  ! xferDomainAsExport.is_null() && ! xferDomainAsExport->getSourceMap()->isSameAs(*domainMap),
7376  std::invalid_argument,
7377  prefix << "The source map of the 'domainTransfer' input argument must be "
7378  "the same as the rebalanced domain map 'domainMap'");
7379 
7380  // The basic algorithm here is:
7381  //
7382  // 1. Call the moral equivalent of "distor.do" to handle the import.
7383  // 2. Copy all the Imported and Copy/Permuted data into the raw
7384  // CrsGraph pointers, still using GIDs.
7385  // 3. Call an optimized version of MakeColMap that avoids the
7386  // Directory lookups (since the importer knows who owns all the
7387  // GIDs) AND reindexes to LIDs.
7388  // 4. Call expertStaticFillComplete()
7389 
7390  // Get information from the Importer
7391  const size_t NumSameIDs = rowTransfer.getNumSameIDs();
7392  ArrayView<const LO> ExportLIDs = reverseMode ?
7393  rowTransfer.getRemoteLIDs() : rowTransfer.getExportLIDs();
7394  ArrayView<const LO> RemoteLIDs = reverseMode ?
7395  rowTransfer.getExportLIDs() : rowTransfer.getRemoteLIDs();
7396  ArrayView<const LO> PermuteToLIDs = reverseMode ?
7397  rowTransfer.getPermuteFromLIDs() : rowTransfer.getPermuteToLIDs();
7398  ArrayView<const LO> PermuteFromLIDs = reverseMode ?
7399  rowTransfer.getPermuteToLIDs() : rowTransfer.getPermuteFromLIDs();
7400  Distributor& Distor = rowTransfer.getDistributor();
7401 
7402  // Owning PIDs
7403  Teuchos::Array<int> SourcePids;
7404  Teuchos::Array<int> TargetPids;
7405  int MyPID = getComm()->getRank();
7406 
7407  // Temp variables for sub-communicators
7408  RCP<const map_type> ReducedRowMap, ReducedColMap,
7409  ReducedDomainMap, ReducedRangeMap;
7410  RCP<const Comm<int> > ReducedComm;
7411 
7412  // If the user gave us a null destGraph, then construct the new
7413  // destination graph. We will replace its column Map later.
7414  if (destGraph.is_null()) {
7415  destGraph = rcp(new this_type(MyRowMap, 0, StaticProfile, graphparams));
7416  }
7417 
7418  /***************************************************/
7419  /***** 1) First communicator restriction phase ****/
7420  /***************************************************/
7421  if (restrictComm) {
7422  ReducedRowMap = MyRowMap->removeEmptyProcesses();
7423  ReducedComm = ReducedRowMap.is_null() ?
7424  Teuchos::null :
7425  ReducedRowMap->getComm();
7426  destGraph->removeEmptyProcessesInPlace(ReducedRowMap);
7427 
7428  ReducedDomainMap = MyRowMap.getRawPtr() == MyDomainMap.getRawPtr() ?
7429  ReducedRowMap :
7430  MyDomainMap->replaceCommWithSubset(ReducedComm);
7431  ReducedRangeMap = MyRowMap.getRawPtr() == MyRangeMap.getRawPtr() ?
7432  ReducedRowMap :
7433  MyRangeMap->replaceCommWithSubset(ReducedComm);
7434 
7435  // Reset the "my" maps
7436  MyRowMap = ReducedRowMap;
7437  MyDomainMap = ReducedDomainMap;
7438  MyRangeMap = ReducedRangeMap;
7439 
7440  // Update my PID, if we've restricted the communicator
7441  if (! ReducedComm.is_null()) {
7442  MyPID = ReducedComm->getRank();
7443  }
7444  else {
7445  MyPID = -2; // For debugging
7446  }
7447  }
7448  else {
7449  ReducedComm = MyRowMap->getComm();
7450  }
7451 
7452  /***************************************************/
7453  /***** 2) From Tpera::DistObject::doTransfer() ****/
7454  /***************************************************/
7455 #ifdef HAVE_TPETRA_MMM_TIMINGS
7456  MM = Teuchos::null;
7457  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("ImportSetup"))));
7458 #endif
7459  // Get the owning PIDs
7460  RCP<const import_type> MyImporter = getImporter();
7461 
7462  // check whether domain maps of source graph and base domain map is the same
7463  bool bSameDomainMap = BaseDomainMap->isSameAs(*getDomainMap());
7464 
7465  if (! restrictComm && ! MyImporter.is_null() && bSameDomainMap ) {
7466  // Same domain map as source graph
7467  //
7468  // NOTE: This won't work for restrictComm (because the Import
7469  // doesn't know the restricted PIDs), though writing an
7470  // optimized version for that case would be easy (Import an
7471  // IntVector of the new PIDs). Might want to add this later.
7472  Import_Util::getPids(*MyImporter, SourcePids, false);
7473  }
7474  else if (restrictComm && ! MyImporter.is_null() && bSameDomainMap) {
7475  // Same domain map as source graph (restricted communicator)
7476  // We need one import from the domain to the column map
7477  ivector_type SourceDomain_pids(getDomainMap(),true);
7478  ivector_type SourceCol_pids(getColMap());
7479  // SourceDomain_pids contains the restricted pids
7480  SourceDomain_pids.putScalar(MyPID);
7481 
7482  SourceCol_pids.doImport(SourceDomain_pids, *MyImporter, INSERT);
7483  SourcePids.resize(getColMap()->getNodeNumElements());
7484  SourceCol_pids.get1dCopy(SourcePids());
7485  }
7486  else if (MyImporter.is_null() && bSameDomainMap) {
7487  // Graph has no off-process entries
7488  SourcePids.resize(getColMap()->getNodeNumElements());
7489  SourcePids.assign(getColMap()->getNodeNumElements(), MyPID);
7490  }
7491  else if ( ! MyImporter.is_null() &&
7492  ! domainTransfer.is_null() ) {
7493  // general implementation for rectangular matrices with
7494  // domain map different than SourceGraph domain map.
7495  // User has to provide a DomainTransfer object. We need
7496  // to communications (import/export)
7497 
7498  // TargetDomain_pids lives on the rebalanced new domain map
7499  ivector_type TargetDomain_pids(domainMap);
7500  TargetDomain_pids.putScalar(MyPID);
7501 
7502  // SourceDomain_pids lives on the non-rebalanced old domain map
7503  ivector_type SourceDomain_pids(getDomainMap());
7504 
7505  // SourceCol_pids lives on the non-rebalanced old column map
7506  ivector_type SourceCol_pids(getColMap());
7507 
7508  if (! reverseMode && ! xferDomainAsImport.is_null() ) {
7509  SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsImport, INSERT);
7510  }
7511  else if (reverseMode && ! xferDomainAsExport.is_null() ) {
7512  SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsExport, INSERT);
7513  }
7514  else if (! reverseMode && ! xferDomainAsExport.is_null() ) {
7515  SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsExport, INSERT);
7516  }
7517  else if (reverseMode && ! xferDomainAsImport.is_null() ) {
7518  SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsImport, INSERT);
7519  }
7520  else {
7521  TEUCHOS_TEST_FOR_EXCEPTION(
7522  true, std::logic_error,
7523  prefix << "Should never get here! Please report this bug to a Tpetra developer.");
7524  }
7525  SourceCol_pids.doImport(SourceDomain_pids, *MyImporter, INSERT);
7526  SourcePids.resize(getColMap()->getNodeNumElements());
7527  SourceCol_pids.get1dCopy(SourcePids());
7528  }
7529  else if (BaseDomainMap->isSameAs(*BaseRowMap) &&
7530  getDomainMap()->isSameAs(*getRowMap())) {
7531  // We can use the rowTransfer + SourceGraph's Import to find out who owns what.
7532  ivector_type TargetRow_pids(domainMap);
7533  ivector_type SourceRow_pids(getRowMap());
7534  ivector_type SourceCol_pids(getColMap());
7535 
7536  TargetRow_pids.putScalar(MyPID);
7537  if (! reverseMode && xferAsImport != nullptr) {
7538  SourceRow_pids.doExport(TargetRow_pids, *xferAsImport, INSERT);
7539  }
7540  else if (reverseMode && xferAsExport != nullptr) {
7541  SourceRow_pids.doExport(TargetRow_pids, *xferAsExport, INSERT);
7542  }
7543  else if (! reverseMode && xferAsExport != nullptr) {
7544  SourceRow_pids.doImport(TargetRow_pids, *xferAsExport, INSERT);
7545  }
7546  else if (reverseMode && xferAsImport != nullptr) {
7547  SourceRow_pids.doImport(TargetRow_pids, *xferAsImport, INSERT);
7548  }
7549  else {
7550  TEUCHOS_TEST_FOR_EXCEPTION(
7551  true, std::logic_error,
7552  prefix << "Should never get here! Please report this bug to a Tpetra developer.");
7553  }
7554  SourceCol_pids.doImport(SourceRow_pids, *MyImporter, INSERT);
7555  SourcePids.resize(getColMap()->getNodeNumElements());
7556  SourceCol_pids.get1dCopy(SourcePids());
7557  }
7558  else {
7559  TEUCHOS_TEST_FOR_EXCEPTION(
7560  true, std::invalid_argument,
7561  prefix << "This method only allows either domainMap == getDomainMap(), "
7562  "or (domainMap == rowTransfer.getTargetMap() and getDomainMap() == getRowMap()).");
7563  }
7564 
7565  // Tpetra-specific stuff
7566  size_t constantNumPackets = destGraph->constantNumberOfPackets();
7567  if (constantNumPackets == 0) {
7568  destGraph->reallocArraysForNumPacketsPerLid(ExportLIDs.size(),
7569  RemoteLIDs.size());
7570  }
7571  else {
7572  // There are a constant number of packets per element. We
7573  // already know (from the number of "remote" (incoming)
7574  // elements) how many incoming elements we expect, so we can
7575  // resize the buffer accordingly.
7576  const size_t rbufLen = RemoteLIDs.size() * constantNumPackets;
7577  destGraph->reallocImportsIfNeeded(rbufLen, false, nullptr);
7578  }
7579 
7580  {
7581  // packAndPrepare* methods modify numExportPacketsPerLID_.
7582  destGraph->numExportPacketsPerLID_.modify_host();
7583  Teuchos::ArrayView<size_t> numExportPacketsPerLID =
7584  getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
7585 
7586  // Pack & Prepare w/ owning PIDs
7587  packCrsGraphWithOwningPIDs(*this, destGraph->exports_,
7588  numExportPacketsPerLID, ExportLIDs,
7589  SourcePids, constantNumPackets, Distor);
7590  }
7591 
7592  // Do the exchange of remote data.
7593 #ifdef HAVE_TPETRA_MMM_TIMINGS
7594  MM = Teuchos::null;
7595  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Transfer"))));
7596 #endif
7597 
7598  if (communication_needed) {
7599  if (reverseMode) {
7600  if (constantNumPackets == 0) { // variable number of packets per LID
7601  // Make sure that host has the latest version, since we're
7602  // using the version on host. If host has the latest
7603  // version, syncing to host does nothing.
7604  destGraph->numExportPacketsPerLID_.sync_host();
7605  Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
7606  getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
7607  destGraph->numImportPacketsPerLID_.sync_host();
7608  Teuchos::ArrayView<size_t> numImportPacketsPerLID =
7609  getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
7610  Distor.doReversePostsAndWaits(numExportPacketsPerLID, 1,
7611  numImportPacketsPerLID);
7612  size_t totalImportPackets = 0;
7613  for (Array_size_type i = 0; i < numImportPacketsPerLID.size(); ++i) {
7614  totalImportPackets += numImportPacketsPerLID[i];
7615  }
7616 
7617  // Reallocation MUST go before setting the modified flag,
7618  // because it may clear out the flags.
7619  destGraph->reallocImportsIfNeeded(totalImportPackets, false, nullptr);
7620  destGraph->imports_.modify_host();
7621  Teuchos::ArrayView<packet_type> hostImports =
7622  getArrayViewFromDualView(destGraph->imports_);
7623  // This is a legacy host pack/unpack path, so use the host
7624  // version of exports_.
7625  destGraph->exports_.sync_host();
7626  Teuchos::ArrayView<const packet_type> hostExports =
7627  getArrayViewFromDualView(destGraph->exports_);
7628  Distor.doReversePostsAndWaits(hostExports,
7629  numExportPacketsPerLID,
7630  hostImports,
7631  numImportPacketsPerLID);
7632  }
7633  else { // constant number of packets per LI
7634  destGraph->imports_.modify_host();
7635  Teuchos::ArrayView<packet_type> hostImports =
7636  getArrayViewFromDualView(destGraph->imports_);
7637  // This is a legacy host pack/unpack path, so use the host
7638  // version of exports_.
7639  destGraph->exports_.sync_host();
7640  Teuchos::ArrayView<const packet_type> hostExports =
7641  getArrayViewFromDualView(destGraph->exports_);
7642  Distor.doReversePostsAndWaits(hostExports,
7643  constantNumPackets,
7644  hostImports);
7645  }
7646  }
7647  else { // forward mode (the default)
7648  if (constantNumPackets == 0) { // variable number of packets per LID
7649  // Make sure that host has the latest version, since we're
7650  // using the version on host. If host has the latest
7651  // version, syncing to host does nothing.
7652  destGraph->numExportPacketsPerLID_.sync_host();
7653  Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
7654  getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
7655  destGraph->numImportPacketsPerLID_.sync_host();
7656  Teuchos::ArrayView<size_t> numImportPacketsPerLID =
7657  getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
7658  Distor.doPostsAndWaits(numExportPacketsPerLID, 1,
7659  numImportPacketsPerLID);
7660  size_t totalImportPackets = 0;
7661  for (Array_size_type i = 0; i < numImportPacketsPerLID.size(); ++i) {
7662  totalImportPackets += numImportPacketsPerLID[i];
7663  }
7664 
7665  // Reallocation MUST go before setting the modified flag,
7666  // because it may clear out the flags.
7667  destGraph->reallocImportsIfNeeded(totalImportPackets, false, nullptr);
7668  destGraph->imports_.modify_host();
7669  Teuchos::ArrayView<packet_type> hostImports =
7670  getArrayViewFromDualView(destGraph->imports_);
7671  // This is a legacy host pack/unpack path, so use the host
7672  // version of exports_.
7673  destGraph->exports_.sync_host();
7674  Teuchos::ArrayView<const packet_type> hostExports =
7675  getArrayViewFromDualView(destGraph->exports_);
7676  Distor.doPostsAndWaits(hostExports,
7677  numExportPacketsPerLID,
7678  hostImports,
7679  numImportPacketsPerLID);
7680  }
7681  else { // constant number of packets per LID
7682  destGraph->imports_.modify_host();
7683  Teuchos::ArrayView<packet_type> hostImports =
7684  getArrayViewFromDualView(destGraph->imports_);
7685  // This is a legacy host pack/unpack path, so use the host
7686  // version of exports_.
7687  destGraph->exports_.sync_host();
7688  Teuchos::ArrayView<const packet_type> hostExports =
7689  getArrayViewFromDualView(destGraph->exports_);
7690  Distor.doPostsAndWaits(hostExports,
7691  constantNumPackets,
7692  hostImports);
7693  }
7694  }
7695  }
7696 
7697  /*********************************************************************/
7698  /**** 3) Copy all of the Same/Permute/Remote data into CSR_arrays ****/
7699  /*********************************************************************/
7700 
7701 #ifdef HAVE_TPETRA_MMM_TIMINGS
7702  MM = Teuchos::null;
7703  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Unpack-1"))));
7704 #endif
7705 
7706  // Backwards compatibility measure. We'll use this again below.
7707  destGraph->numImportPacketsPerLID_.sync_host();
7708  Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
7709  getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
7710  destGraph->imports_.sync_host();
7711  Teuchos::ArrayView<const packet_type> hostImports =
7712  getArrayViewFromDualView(destGraph->imports_);
7713  size_t mynnz =
7714  unpackAndCombineWithOwningPIDsCount(*this, RemoteLIDs, hostImports,
7715  numImportPacketsPerLID,
7716  constantNumPackets, Distor, INSERT,
7717  NumSameIDs, PermuteToLIDs, PermuteFromLIDs);
7718  size_t N = BaseRowMap->getNodeNumElements();
7719 
7720  // Allocations
7721  ArrayRCP<size_t> CSR_rowptr(N+1);
7722  ArrayRCP<GO> CSR_colind_GID;
7723  ArrayRCP<LO> CSR_colind_LID;
7724  CSR_colind_GID.resize(mynnz);
7725 
7726  // If LO and GO are the same, we can reuse memory when
7727  // converting the column indices from global to local indices.
7728  if (typeid(LO) == typeid(GO)) {
7729  CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO>(CSR_colind_GID);
7730  }
7731  else {
7732  CSR_colind_LID.resize(mynnz);
7733  }
7734 
7735  // FIXME (mfh 15 May 2014) Why can't we abstract this out as an
7736  // unpackAndCombine method on a "CrsArrays" object? This passing
7737  // in a huge list of arrays is icky. Can't we have a bit of an
7738  // abstraction? Implementing a concrete DistObject subclass only
7739  // takes five methods.
7740  unpackAndCombineIntoCrsArrays(*this, RemoteLIDs, hostImports,
7741  numImportPacketsPerLID, constantNumPackets,
7742  Distor, INSERT, NumSameIDs, PermuteToLIDs,
7743  PermuteFromLIDs, N, mynnz, MyPID,
7744  CSR_rowptr(), CSR_colind_GID(),
7745  SourcePids(), TargetPids);
7746 
7747  /**************************************************************/
7748  /**** 4) Call Optimized MakeColMap w/ no Directory Lookups ****/
7749  /**************************************************************/
7750 #ifdef HAVE_TPETRA_MMM_TIMINGS
7751  MM = Teuchos::null;
7752  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Unpack-2"))));
7753 #endif
7754  // Call an optimized version of makeColMap that avoids the
7755  // Directory lookups (since the Import object knows who owns all
7756  // the GIDs).
7757  Teuchos::Array<int> RemotePids;
7758  Import_Util::lowCommunicationMakeColMapAndReindex(CSR_rowptr(),
7759  CSR_colind_LID(),
7760  CSR_colind_GID(),
7761  BaseDomainMap,
7762  TargetPids, RemotePids,
7763  MyColMap);
7764 
7765  /*******************************************************/
7766  /**** 4) Second communicator restriction phase ****/
7767  /*******************************************************/
7768  if (restrictComm) {
7769  ReducedColMap = (MyRowMap.getRawPtr() == MyColMap.getRawPtr()) ?
7770  ReducedRowMap :
7771  MyColMap->replaceCommWithSubset(ReducedComm);
7772  MyColMap = ReducedColMap; // Reset the "my" maps
7773  }
7774 
7775  // Replace the col map
7776  destGraph->replaceColMap(MyColMap);
7777 
7778  // Short circuit if the processor is no longer in the communicator
7779  //
7780  // NOTE: Epetra replaces modifies all "removed" processes so they
7781  // have a dummy (serial) Map that doesn't touch the original
7782  // communicator. Duplicating that here might be a good idea.
7783  if (ReducedComm.is_null()) {
7784  return;
7785  }
7786 
7787  /***************************************************/
7788  /**** 5) Sort ****/
7789  /***************************************************/
7790  if ((! reverseMode && xferAsImport != nullptr) ||
7791  (reverseMode && xferAsExport != nullptr)) {
7792  Import_Util::sortCrsEntries(CSR_rowptr(),
7793  CSR_colind_LID());
7794  }
7795  else if ((! reverseMode && xferAsExport != nullptr) ||
7796  (reverseMode && xferAsImport != nullptr)) {
7797  Import_Util::sortAndMergeCrsEntries(CSR_rowptr(),
7798  CSR_colind_LID());
7799  if (CSR_rowptr[N] != mynnz) {
7800  CSR_colind_LID.resize(CSR_rowptr[N]);
7801  }
7802  }
7803  else {
7804  TEUCHOS_TEST_FOR_EXCEPTION(
7805  true, std::logic_error,
7806  prefix << "Should never get here! Please report this bug to a Tpetra developer.");
7807  }
7808  /***************************************************/
7809  /**** 6) Reset the colmap and the arrays ****/
7810  /***************************************************/
7811 
7812  // Call constructor for the new graph (restricted as needed)
7813  //
7814  destGraph->setAllIndices(CSR_rowptr, CSR_colind_LID);
7815 
7816  /***************************************************/
7817  /**** 7) Build Importer & Call ESFC ****/
7818  /***************************************************/
7819  // Pre-build the importer using the existing PIDs
7820  Teuchos::ParameterList esfc_params;
7821 #ifdef HAVE_TPETRA_MMM_TIMINGS
7822  MM = Teuchos::null;
7823  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("CreateImporter"))));
7824 #endif
7825  RCP<import_type> MyImport = rcp(new import_type(MyDomainMap, MyColMap, RemotePids));
7826 #ifdef HAVE_TPETRA_MMM_TIMINGS
7827  MM = Teuchos::null;
7828  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("ESFC"))));
7829 
7830  esfc_params.set("Timer Label",prefix + std::string("TAFC"));
7831 #endif
7832  if(!params.is_null())
7833  esfc_params.set("compute global constants",params->get("compute global constants",true));
7834 
7835  destGraph->expertStaticFillComplete(MyDomainMap, MyRangeMap,
7836  MyImport, Teuchos::null, rcp(&esfc_params,false));
7837 
7838  }
7839 
7840  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7841  void
7844  const import_type& importer,
7845  const Teuchos::RCP<const map_type>& domainMap,
7846  const Teuchos::RCP<const map_type>& rangeMap,
7847  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7848  {
7849  transferAndFillComplete(destGraph, importer, Teuchos::null, domainMap, rangeMap, params);
7850  }
7851 
7852  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7853  void
7856  const import_type& rowImporter,
7857  const import_type& domainImporter,
7858  const Teuchos::RCP<const map_type>& domainMap,
7859  const Teuchos::RCP<const map_type>& rangeMap,
7860  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7861  {
7862  transferAndFillComplete(destGraph, rowImporter, Teuchos::rcpFromRef(domainImporter), domainMap, rangeMap, params);
7863  }
7864 
7865  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7866  void
7869  const export_type& exporter,
7870  const Teuchos::RCP<const map_type>& domainMap,
7871  const Teuchos::RCP<const map_type>& rangeMap,
7872  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7873  {
7874  transferAndFillComplete(destGraph, exporter, Teuchos::null, domainMap, rangeMap, params);
7875  }
7876 
7877  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7878  void
7881  const export_type& rowExporter,
7882  const export_type& domainExporter,
7883  const Teuchos::RCP<const map_type>& domainMap,
7884  const Teuchos::RCP<const map_type>& rangeMap,
7885  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7886  {
7887  transferAndFillComplete(destGraph, rowExporter, Teuchos::rcpFromRef(domainExporter), domainMap, rangeMap, params);
7888  }
7889 
7890 
7891  template<class LocalOrdinal, class GlobalOrdinal, class Node>
7892  void
7895  {
7896  std::swap(graph.rowMap_, this->rowMap_);
7897  std::swap(graph.colMap_, this->colMap_);
7898  std::swap(graph.rangeMap_, this->rangeMap_);
7899  std::swap(graph.domainMap_, this->domainMap_);
7900 
7901  std::swap(graph.importer_, this->importer_);
7902  std::swap(graph.exporter_, this->exporter_);
7903 
7904  std::swap(graph.lclGraph_, this->lclGraph_);
7905 
7906  std::swap(graph.nodeNumDiags_, this->nodeNumDiags_);
7907  std::swap(graph.nodeMaxNumRowEntries_, this->nodeMaxNumRowEntries_);
7908 
7909  std::swap(graph.globalNumEntries_, this->globalNumEntries_);
7910  std::swap(graph.globalNumDiags_, this->globalNumDiags_);
7911  std::swap(graph.globalMaxNumRowEntries_, this->globalMaxNumRowEntries_);
7912 
7913  std::swap(graph.pftype_, this->pftype_);
7914 
7915  std::swap(graph.numAllocForAllRows_, this->numAllocForAllRows_);
7916 
7917  std::swap(graph.k_rowPtrs_, this->k_rowPtrs_);
7918 
7919  std::swap(graph.k_lclInds1D_, this->k_lclInds1D_);
7920  std::swap(graph.k_gblInds1D_, this->k_gblInds1D_);
7921 
7922  std::swap(graph.lclInds2D_, this->lclInds2D_);
7923  std::swap(graph.gblInds2D_, this->gblInds2D_);
7924 
7925  std::swap(graph.storageStatus_, this->storageStatus_);
7926 
7927  std::swap(graph.indicesAreAllocated_, this->indicesAreAllocated_);
7928  std::swap(graph.indicesAreLocal_, this->indicesAreLocal_);
7929  std::swap(graph.indicesAreGlobal_, this->indicesAreGlobal_);
7930  std::swap(graph.fillComplete_, this->fillComplete_);
7931  std::swap(graph.lowerTriangular_, this->lowerTriangular_);
7932  std::swap(graph.upperTriangular_, this->upperTriangular_);
7933  std::swap(graph.indicesAreSorted_, this->indicesAreSorted_);
7934  std::swap(graph.noRedundancies_, this->noRedundancies_);
7935  std::swap(graph.haveLocalConstants_, this->haveLocalConstants_);
7936  std::swap(graph.haveGlobalConstants_, this->haveGlobalConstants_);
7937 
7938  std::swap(graph.sortGhostsAssociatedWithEachProcessor_, this->sortGhostsAssociatedWithEachProcessor_);
7939 
7940  std::swap(graph.k_numAllocPerRow_, this->k_numAllocPerRow_); // View
7941  std::swap(graph.k_numRowEntries_, this->k_numRowEntries_); // View
7942  std::swap(graph.nonlocals_, this->nonlocals_); // std::map
7943  }
7944 
7945 
7946  template<class LocalOrdinal, class GlobalOrdinal, class Node>
7947  bool
7950  {
7951  auto compare_nonlocals = [&] (const nonlocals_type & m1, const nonlocals_type & m2) {
7952  bool output = true;
7953  output = m1.size() == m2.size() ? output : false;
7954  for(auto & it_m: m1)
7955  {
7956  size_t key = it_m.first;
7957  output = m2.find(key) != m2.end() ? output : false;
7958  if(output)
7959  {
7960  auto v1 = m1.find(key)->second;
7961  auto v2 = m2.find(key)->second;
7962  std::sort(v1.begin(), v1.end());
7963  std::sort(v2.begin(), v2.end());
7964 
7965  output = v1.size() == v2.size() ? output : false;
7966  for(size_t i=0; output && i<v1.size(); i++)
7967  {
7968  output = v1[i]==v2[i] ? output : false;
7969  }
7970  }
7971  }
7972  return output;
7973  };
7974 
7975  bool output = true;
7976 
7977  output = this->rowMap_->isSameAs( *(graph.rowMap_) ) ? output : false;
7978  output = this->colMap_->isSameAs( *(graph.colMap_) ) ? output : false;
7979  output = this->rangeMap_->isSameAs( *(graph.rangeMap_) ) ? output : false;
7980  output = this->domainMap_->isSameAs( *(graph.domainMap_) ) ? output : false;
7981 
7982  output = this->nodeNumDiags_ == graph.nodeNumDiags_ ? output : false;
7983  output = this->nodeMaxNumRowEntries_ == graph.nodeMaxNumRowEntries_ ? output : false;
7984 
7985  output = this->globalNumEntries_ == graph.globalNumEntries_ ? output : false;
7986  output = this->globalNumDiags_ == graph.globalNumDiags_ ? output : false;
7987  output = this->globalMaxNumRowEntries_ == graph.globalMaxNumRowEntries_ ? output : false;
7988 
7989  output = this->pftype_ == graph.pftype_ ? output : false; // ProfileType is a enum (scalar)
7990 
7991  output = this->numAllocForAllRows_ == graph.numAllocForAllRows_ ? output : false;
7992 
7993  output = this->lclInds2D_ == graph.lclInds2D_ ? output : false; // Teuchos::Array has == overloaded
7994  output = this->gblInds2D_ == graph.gblInds2D_ ? output : false; // Teuchos::Array has == overloaded
7995 
7996  output = this->storageStatus_ == graph.storageStatus_ ? output : false; // EStorageStatus is an enum
7997 
7998  output = this->indicesAreAllocated_ == graph.indicesAreAllocated_ ? output : false;
7999  output = this->indicesAreLocal_ == graph.indicesAreLocal_ ? output : false;
8000  output = this->indicesAreGlobal_ == graph.indicesAreGlobal_ ? output : false;
8001  output = this->fillComplete_ == graph.fillComplete_ ? output : false;
8002  output = this->lowerTriangular_ == graph.lowerTriangular_ ? output : false;
8003  output = this->upperTriangular_ == graph.upperTriangular_ ? output : false;
8004  output = this->indicesAreSorted_ == graph.indicesAreSorted_ ? output : false;
8005  output = this->noRedundancies_ == graph.noRedundancies_ ? output : false;
8006  output = this->haveLocalConstants_ == graph.haveLocalConstants_ ? output : false;
8007  output = this->haveGlobalConstants_ == graph.haveGlobalConstants_ ? output : false;
8008  output = this->sortGhostsAssociatedWithEachProcessor_ == this->sortGhostsAssociatedWithEachProcessor_ ? output : false;
8009 
8010  // Compare nonlocals_ -- std::map<GlobalOrdinal, std::vector<GlobalOrdinal> >
8011  // nonlocals_ isa std::map<GO, std::vector<GO> >
8012  output = compare_nonlocals(this->nonlocals_, graph.nonlocals_) ? output : false;
8013 
8014  // Compare k_numAllocPerRow_ isa Kokkos::View::HostMirror
8015  // - since this is a HostMirror type, it should be in host memory already
8016  output = this->k_numAllocPerRow_.extent(0) == graph.k_numAllocPerRow_.extent(0) ? output : false;
8017  if(output && this->k_numAllocPerRow_.extent(0) > 0)
8018  {
8019  for(size_t i=0; output && i<this->k_numAllocPerRow_.extent(0); i++)
8020  output = this->k_numAllocPerRow_(i) == graph.k_numAllocPerRow_(i) ? output : false;
8021  }
8022 
8023  // Compare k_numRowEntries_ isa Kokkos::View::HostMirror
8024  // - since this is a HostMirror type, it should be in host memory already
8025  output = this->k_numRowEntries_.extent(0) == graph.k_numRowEntries_.extent(0) ? output : false;
8026  if(output && this->k_numRowEntries_.extent(0) > 0)
8027  {
8028  for(size_t i = 0; output && i < this->k_numRowEntries_.extent(0); i++)
8029  output = this->k_numRowEntries_(i) == graph.k_numRowEntries_(i) ? output : false;
8030  }
8031 
8032  // Compare this->k_rowPtrs_ isa Kokkos::View<LocalOrdinal*, ...>
8033  output = this->k_rowPtrs_.extent(0) == graph.k_rowPtrs_.extent(0) ? output : false;
8034  if(output && this->k_rowPtrs_.extent(0) > 0)
8035  {
8036  typename local_graph_type::row_map_type::const_type::HostMirror k_rowPtrs_host_this = Kokkos::create_mirror_view(this->k_rowPtrs_);
8037  typename local_graph_type::row_map_type::const_type::HostMirror k_rowPtrs_host_graph= Kokkos::create_mirror_view(graph.k_rowPtrs_);
8038  Kokkos::deep_copy(k_rowPtrs_host_this, this->k_rowPtrs_);
8039  Kokkos::deep_copy(k_rowPtrs_host_graph, graph.k_rowPtrs_);
8040  for(size_t i=0; output && i<k_rowPtrs_host_this.extent(0); i++)
8041  output = k_rowPtrs_host_this(i) == k_rowPtrs_host_graph(i) ? output : false;
8042  }
8043 
8044  // Compare k_lclInds1D_ isa Kokkos::View<LocalOrdinal*, ...>
8045  output = this->k_lclInds1D_.extent(0) == graph.k_lclInds1D_.extent(0) ? output : false;
8046  if(output && this->k_lclInds1D_.extent(0) > 0)
8047  {
8048  typename local_graph_type::entries_type::non_const_type::HostMirror k_lclInds1D_host_this = Kokkos::create_mirror_view(this->k_lclInds1D_);
8049  typename local_graph_type::entries_type::non_const_type::HostMirror k_lclInds1D_host_graph= Kokkos::create_mirror_view(graph.k_lclInds1D_);
8050  Kokkos::deep_copy(k_lclInds1D_host_this, this->k_lclInds1D_);
8051  Kokkos::deep_copy(k_lclInds1D_host_graph, graph.k_lclInds1D_);
8052  for(size_t i=0; output && i < k_lclInds1D_host_this.extent(0); i++)
8053  output = k_lclInds1D_host_this(i) == k_lclInds1D_host_graph(i) ? output : false;
8054  }
8055 
8056  // Compare k_gblInds1D_ isa Kokkos::View<GlobalOrdinal*, ...>
8057  output = this->k_gblInds1D_.extent(0) == graph.k_gblInds1D_.extent(0) ? output : false;
8058  if(output && this->k_gblInds1D_.extent(0) > 0)
8059  {
8060  typename t_GlobalOrdinal_1D::HostMirror k_gblInds1D_host_this = Kokkos::create_mirror_view(this->k_gblInds1D_);
8061  typename t_GlobalOrdinal_1D::HostMirror k_gblInds1D_host_graph = Kokkos::create_mirror_view(graph.k_gblInds1D_);
8062  Kokkos::deep_copy(k_gblInds1D_host_this, this->k_gblInds1D_);
8063  Kokkos::deep_copy(k_gblInds1D_host_graph, graph.k_gblInds1D_);
8064  for(size_t i=0; output && i<k_gblInds1D_host_this.extent(0); i++)
8065  output = k_gblInds1D_host_this(i) == k_gblInds1D_host_graph(i) ? output : false;
8066  }
8067 
8068  // Check lclGraph_ // isa Kokkos::StaticCrsGraph<LocalOrdinal, Kokkos::LayoutLeft, execution_space>
8069  // Kokkos::StaticCrsGraph has 3 data members in it:
8070  // Kokkos::View<size_type*, ...> row_map (local_graph_type::row_map_type)
8071  // Kokkos::View<data_type*, ...> entries (local_graph_type::entries_type)
8072  // Kokkos::View<size_type*, ...> row_block_offsets (local_graph_type::row_block_type)
8073  // There is currently no Kokkos::StaticCrsGraph comparison function that's built-in, so we will just compare
8074  // the three data items here. This can be replaced if Kokkos ever puts in its own comparison routine.
8075  output = this->lclGraph_.row_map.extent(0) == graph.lclGraph_.row_map.extent(0) ? output : false;
8076  if(output && this->lclGraph_.row_map.extent(0) > 0)
8077  {
8078  typename local_graph_type::row_map_type::HostMirror lclGraph_rowmap_host_this = Kokkos::create_mirror_view(this->lclGraph_.row_map);
8079  typename local_graph_type::row_map_type::HostMirror lclGraph_rowmap_host_graph = Kokkos::create_mirror_view(graph.lclGraph_.row_map);
8080  Kokkos::deep_copy(lclGraph_rowmap_host_this, this->lclGraph_.row_map);
8081  Kokkos::deep_copy(lclGraph_rowmap_host_graph, graph.lclGraph_.row_map);
8082  for(size_t i=0; output && i<lclGraph_rowmap_host_this.extent(0); i++)
8083  output = lclGraph_rowmap_host_this(i) == lclGraph_rowmap_host_graph(i) ? output : false;
8084  }
8085 
8086  output = this->lclGraph_.entries.extent(0) == graph.lclGraph_.entries.extent(0) ? output : false;
8087  if(output && this->lclGraph_.entries.extent(0) > 0)
8088  {
8089  typename local_graph_type::entries_type::HostMirror lclGraph_entries_host_this = Kokkos::create_mirror_view(this->lclGraph_.entries);
8090  typename local_graph_type::entries_type::HostMirror lclGraph_entries_host_graph = Kokkos::create_mirror_view(graph.lclGraph_.entries);
8091  Kokkos::deep_copy(lclGraph_entries_host_this, this->lclGraph_.entries);
8092  Kokkos::deep_copy(lclGraph_entries_host_graph, graph.lclGraph_.entries);
8093  for(size_t i=0; output && i<lclGraph_entries_host_this.extent(0); i++)
8094  output = lclGraph_entries_host_this(i) == lclGraph_entries_host_graph(i) ? output : false;
8095  }
8096 
8097  output = this->lclGraph_.row_block_offsets.extent(0) == graph.lclGraph_.row_block_offsets.extent(0) ? output : false;
8098  if(output && this->lclGraph_.row_block_offsets.extent(0) > 0)
8099  {
8100  typename local_graph_type::row_block_type::HostMirror lclGraph_rbo_host_this = Kokkos::create_mirror_view(this->lclGraph_.row_block_offsets);
8101  typename local_graph_type::row_block_type::HostMirror lclGraph_rbo_host_graph = Kokkos::create_mirror_view(graph.lclGraph_.row_block_offsets);
8102  Kokkos::deep_copy(lclGraph_rbo_host_this, this->lclGraph_.row_block_offsets);
8103  Kokkos::deep_copy(lclGraph_rbo_host_graph, graph.lclGraph_.row_block_offsets);
8104  for(size_t i=0; output && i < lclGraph_rbo_host_this.extent(0); i++)
8105  output = lclGraph_rbo_host_this(i) == lclGraph_rbo_host_graph(i) ? output : false;
8106  }
8107 
8108  // For the Importer and Exporter, we shouldn't need to explicitly check them since
8109  // they will be consistent with the maps.
8110  // Note: importer_ isa Teuchos::RCP<const import_type>
8111  // exporter_ isa Teuchos::RCP<const export_type>
8112 
8113  return output;
8114  }
8115 
8116 
8117 
8118 } // namespace Tpetra
8119 
8120 //
8121 // Explicit instantiation macros
8122 //
8123 // Must be expanded from within the Tpetra namespace!
8124 //
8125 
8126 #define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
8127  template<> \
8128  Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
8129  importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
8130  const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8131  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8132  CrsGraph<LO,GO,NODE>::node_type>& importer, \
8133  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8134  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8135  CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
8136  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8137  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8138  CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
8139  const Teuchos::RCP<Teuchos::ParameterList>& params);
8140 
8141 #define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
8142  template<> \
8143  Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
8144  importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
8145  const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8146  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8147  CrsGraph<LO,GO,NODE>::node_type>& rowImporter, \
8148  const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8149  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8150  CrsGraph<LO,GO,NODE>::node_type>& domainImporter, \
8151  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8152  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8153  CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
8154  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8155  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8156  CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
8157  const Teuchos::RCP<Teuchos::ParameterList>& params);
8158 
8159 
8160 #define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
8161  template<> \
8162  Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
8163  exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
8164  const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8165  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8166  CrsGraph<LO,GO,NODE>::node_type>& exporter, \
8167  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8168  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8169  CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
8170  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8171  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8172  CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
8173  const Teuchos::RCP<Teuchos::ParameterList>& params);
8174 
8175 #define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
8176  template<> \
8177  Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
8178  exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
8179  const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8180  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8181  CrsGraph<LO,GO,NODE>::node_type>& rowExporter, \
8182  const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8183  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8184  CrsGraph<LO,GO,NODE>::node_type>& domainExporter, \
8185  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8186  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8187  CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
8188  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8189  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8190  CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
8191  const Teuchos::RCP<Teuchos::ParameterList>& params);
8192 
8193 
8194 #define TPETRA_CRSGRAPH_INSTANT( LO, GO, NODE ) \
8195  template class CrsGraph<LO, GO, NODE>; \
8196  TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
8197  TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
8198  TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
8199  TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE)
8200 
8201 
8202 #endif // TPETRA_CRSGRAPH_DEF_HPP
Teuchos::ArrayRCP< Teuchos::Array< local_ordinal_type > > lclInds2D_
Local column indices for all rows.
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
void setDomainRangeMaps(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap)
Teuchos::RCP< const map_type > getRowMap() const override
Returns the Map that describes the row distribution in this graph.
void setAllIndices(const typename local_graph_type::row_map_type &rowPointers, const typename local_graph_type::entries_type::non_const_type &columnIndices)
Set the graph&#39;s data directly, using 1-D storage.
size_t nodeMaxNumRowEntries_
Local maximum of the number of entries in each row.
bool indicesAreSorted_
Whether the graph&#39;s indices are sorted in each row, on this process.
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types...
Teuchos::RCP< const map_type > rangeMap_
The Map describing the range of the (matrix corresponding to the) graph.
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
size_t getNodeMaxNumRowEntries() const override
Maximum number of entries in any row of the graph, on this process.
bool isNodeGlobalElement(GlobalOrdinal globalIndex) const
Whether the given global index is owned by this Map on the calling process.
void insertGlobalIndicesFiltered(const local_ordinal_type lclRow, const global_ordinal_type gblColInds[], const local_ordinal_type numGblColInds)
Like insertGlobalIndices(), but with column Map filtering.
bool lowerTriangular_
Whether the graph is locally lower triangular.
size_t nodeNumDiags_
Local number of (populated) diagonal entries.
bool sortGhostsAssociatedWithEachProcessor_
Whether to require makeColMap() (and therefore fillComplete()) to order column Map GIDs associated wi...
size_t insertGlobalIndicesImpl(const local_ordinal_type lclRow, const global_ordinal_type inputGblColInds[], const size_t numInputInds)
Insert global indices, using an input local row index.
void resumeFill(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Resume fill operations.
bool haveGlobalConstants_
Whether all processes have computed global constants.
void expertStaticFillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< const import_type > &importer=Teuchos::null, const Teuchos::RCP< const export_type > &exporter=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Perform a fillComplete on a graph that already has data, via setAllIndices().
Teuchos::RCP< const import_type > getImporter() const override
Returns the importer associated with this graph.
void getLocalDiagOffsets(const Kokkos::View< size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Get offsets of the diagonal entries in the graph.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Kokkos::View< global_ordinal_type *, execution_space > t_GlobalOrdinal_1D
Type of the k_gblInds1D_ array of global column indices.
t_GlobalOrdinal_1D k_gblInds1D_
Global column indices for all rows.
void insertGlobalIndices(const global_ordinal_type globalRow, const Teuchos::ArrayView< const global_ordinal_type > &indices)
Insert global indices into the graph.
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Get the number of entries in the given row (local index).
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const override
Default parameter list suitable for validation.
bool isIdenticalTo(const CrsGraph< LocalOrdinal, GlobalOrdinal, Node > &graph) const
Create a cloned CrsGraph for a different Node type.
bool isMerged() const
Whether duplicate column indices in each row have been merged.
virtual bool checkSizes(const SrcDistObject &source) override
Compare the source and target (this) objects for compatibility.
global_size_t globalNumDiags_
Global number of (populated) diagonal entries.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
size_t getGlobalMaxNumRowEntries() const override
Maximum number of entries in any row of the graph, over all processes in the graph&#39;s communicator...
void getLocalRowView(const local_ordinal_type lclRow, Teuchos::ArrayView< const local_ordinal_type > &lclColInds) const override
Get a const, non-persisting view of the given local row&#39;s local column indices, as a Teuchos::ArrayVi...
Declare and define Tpetra::Details::copyOffsets, an implementation detail of Tpetra (in particular...
bool noRedundancies_
Whether the graph&#39;s indices are non-redundant (merged) in each row, on this process.
bool upperTriangular_
Whether the graph is locally upper triangular.
OffsetsViewType::non_const_value_type computeOffsetsFromConstantCount(const OffsetsViewType &ptr, const CountType count)
Compute offsets from a constant count.
static bool debug()
Whether Tpetra is in debug mode.
size_t findLocalIndices(const RowInfo &rowInfo, const Teuchos::ArrayView< const local_ordinal_type > &indices, std::function< void(const size_t, const size_t, const size_t)> fun) const
Finds indices in the given row.
size_t getNumAllocatedEntriesInGlobalRow(global_ordinal_type globalRow) const
Current number of allocated entries in the given row on the calling (MPI) process, using a global row index.
std::pair< size_t, std::string > makeIndicesLocal()
Convert column indices from global to local.
Allocation information for a locally owned row in a CrsGraph or CrsMatrix.
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, Distributor &distor, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks...
bool hasColMap() const override
Whether the graph has a column Map.
bool isGloballyIndexed() const override
Whether the graph&#39;s column indices are stored as global indices.
void exportAndFillComplete(Teuchos::RCP< CrsGraph< local_ordinal_type, global_ordinal_type, Node > > &destGraph, const export_type &exporter, const Teuchos::RCP< const map_type > &domainMap=Teuchos::null, const Teuchos::RCP< const map_type > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Export from this to the given destination graph, and make the result fill complete.
Teuchos_Ordinal Array_size_type
Size type for Teuchos Array objects.
bool isStorageOptimized() const
Returns true if storage has been optimized.
bool haveLocalConstants_
Whether this process has computed local constants.
bool isNodeLocalElement(LocalOrdinal localIndex) const
Whether the given local index is valid for this Map on the calling process.
::Tpetra::Details::EStorageStatus storageStatus_
Status of the graph&#39;s storage, when not in a fill-complete state.
std::string description() const override
Return a one-line human-readable description of this object.
local_ordinal_type getLocalViewRawConst(const local_ordinal_type *&lclInds, local_ordinal_type &capacity, const RowInfo &rowInfo) const
Get a pointer to the local column indices of a locally owned row, using the result of getRowInfo...
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator, in rank order.
void makeColMap(Teuchos::Array< int > &remotePIDs)
Make and set the graph&#39;s column Map.
Teuchos::RCP< const export_type > getExporter() const override
Returns the exporter associated with this graph.
void getNumEntriesPerLocalRowUpperBound(Teuchos::ArrayRCP< const size_t > &boundPerLocalRow, size_t &boundForAllLocalRows, bool &boundSameForAllLocalRows) const
Get an upper bound on the number of entries that can be stored in each row.
size_t global_size_t
Global size_t object.
size_t getNodeNumEntries() const override
The local number of entries in the graph.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap) override
Remove processes owning zero rows from the Maps and their communicator.
Node node_type
This class&#39; Kokkos Node type.
void reindexColumns(const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortIndicesInEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
void makeImportExport(Teuchos::Array< int > &remotePIDs, const bool useRemotePIDs)
Make the Import and Export objects, if needed.
size_t sortAndMergeRowIndices(const RowInfo &rowInfo, const bool sorted, const bool merged)
Sort and merge duplicate column indices in the given row.
void insertLocalIndices(const local_ordinal_type localRow, const Teuchos::ArrayView< const local_ordinal_type > &indices)
Insert local indices into the graph.
Insert new values that don&#39;t currently exist.
void getGlobalRowCopy(global_ordinal_type gblRow, const Teuchos::ArrayView< global_ordinal_type > &gblColInds, size_t &numColInds) const override
Get a copy of the given row, using global indices.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified, possibly nondefault Kokkos Node type.
Teuchos::RCP< const map_type > rowMap_
The Map describing the distribution of rows of the graph.
Teuchos::RCP< const import_type > importer_
The Import from the domain Map to the column Map.
size_t insertCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, InOutIndices &curIndices, size_t &numAssigned, InIndices const &newIndices, std::function< void(const size_t, const size_t, const size_t)> cb=std::function< void(const size_t, const size_t, const size_t)>())
Insert new indices in to current list of indices.
num_row_entries_type k_numRowEntries_
The number of local entries in each locally owned row.
Teuchos::RCP< const map_type > domainMap_
The Map describing the domain of the (matrix corresponding to the) graph.
OffsetType convertColumnIndicesFromGlobalToLocal(const Kokkos::View< LO *, DT > &lclColInds, const Kokkos::View< const GO *, DT > &gblColInds, const Kokkos::View< const OffsetType *, DT > &ptr, const LocalMap< LO, GO, DT > &lclColMap, const Kokkos::View< const NumEntType *, DT > &numRowEnt)
Convert a (StaticProfile) CrsGraph&#39;s global column indices into local column indices.
global_size_t getGlobalNumRows() const override
Returns the number of global rows in the graph.
Teuchos::ArrayView< const global_ordinal_type > getGlobalView(const RowInfo &rowinfo) const
Get a const, nonowned, globally indexed view of the locally owned row myRow, such that rowinfo = getR...
Functions for manipulating CRS arrays.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
void setLocallyModified()
Report that we made a local modification to its structure.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
local_graph_type::entries_type::non_const_type k_lclInds1D_
Local column indices for all rows.
Teuchos::ArrayView< local_ordinal_type > getLocalViewNonConst(const RowInfo &rowinfo)
Get a nonconst, nonowned, locally indexed view of the locally owned row myRow, such that rowinfo = ge...
size_t getNodeNumRows() const override
Returns the number of graph rows owned on the calling node.
ProfileType getProfileType() const
Returns true if the graph was allocated with static data structures.
void checkInternalState() const
Throw an exception if the internal state is not consistent.
Kokkos::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, execution_space > local_graph_type
The type of the part of the sparse graph on each MPI process.
local_graph_type lclGraph_
Local graph; only initialized after first fillComplete() call.
Sets up and executes a communication plan for a Tpetra DistObject.
local_graph_type::row_map_type::const_type k_rowPtrs_
Row offsets for &quot;1-D&quot; storage.
size_t findCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, const size_t curNumEntries, Indices1 const &curIndices, Indices2 const &newIndices, Callback &&cb)
Finds offsets in to current list of indices.
GlobalOrdinal getGlobalElement(LocalOrdinal localIndex) const
The global index corresponding to the given local index.
CombineMode
Rule for combining data in an Import or Export.
Kokkos::View< const size_t *, execution_space >::HostMirror k_numAllocPerRow_
The maximum number of entries to allow in each locally owned row, per row.
Teuchos::RCP< const export_type > exporter_
The Export from the row Map to the range Map.
void insertGlobalIndicesIntoNonownedRows(const global_ordinal_type gblRow, const global_ordinal_type gblColInds[], const local_ordinal_type numGblColInds)
Implementation of insertGlobalIndices for nonowned rows.
LocalTriangularStructureResult< typename LocalMapType::local_ordinal_type > determineLocalTriangularStructure(const LocalGraphType &G, const LocalMapType &rowMap, const LocalMapType &colMap, const bool ignoreMapsForTriangularStructure)
Count the local number of diagonal entries in a local sparse graph, and determine whether the local p...
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, Distributor &distor, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
Declaration and definition of Tpetra::Details::determineLocalTriangularStructure. ...
bool isFillComplete() const override
Whether fillComplete() has been called and the graph is in compute mode.
size_t getNumAllocatedEntriesInLocalRow(local_ordinal_type localRow) const
Current number of allocated entries in the given row on the calling (MPI) process, using a local row index.
void computeLocalConstants(const bool computeLocalTriangularConstants)
Compute local constants, if they have not yet been computed.
Teuchos::ArrayRCP< const size_t > getNodeRowPtrs() const
Get a host view of the row offsets.
void swap(CrsGraph< local_ordinal_type, global_ordinal_type, Node > &graph)
Swaps the data from *this with the data and maps from graph.
void globalAssemble()
Communicate nonlocal contributions to other processes.
CrsGraphType::global_ordinal_type getGlobalNumDiags(const CrsGraphType &G)
Number of populated diagonal entries in the given sparse graph, over all processes in the graph&#39;s (MP...
typename device_type::execution_space execution_space
This class&#39; Kokkos execution space.
Teuchos::ArrayRCP< Teuchos::Array< global_ordinal_type > > gblInds2D_
Global column indices for all rows.
RowInfo getRowInfoFromGlobalRowIndex(const global_ordinal_type gblRow) const
Get information about the locally owned row with global index gblRow.
Utility functions for packing and unpacking sparse matrix entries.
void packCrsGraph(const CrsGraph< LO, GO, NT > &sourceGraph, Teuchos::Array< typename CrsGraph< LO, GO, NT >::packet_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse graph for communication.
Abstract base class for objects that can be the source of an Import or Export operation.
void removeLocalIndices(local_ordinal_type localRow)
Remove all graph indices from the specified local row.
size_t getNodeAllocationSize() const
The local number of indices allocated for the graph, over all rows on the calling (MPI) process...
LocalOrdinal local_ordinal_type
The type of the graph&#39;s local indices.
global_size_t globalNumEntries_
Global number of entries in the graph.
Teuchos::ArrayView< global_ordinal_type > getGlobalViewNonConst(const RowInfo &rowinfo)
Get a nonconst, nonowned, globally indexed view of the locally owned row myRow, such that rowinfo = g...
ProfileType pftype_
Whether the graph was allocated with static or dynamic profile.
Teuchos::RCP< const map_type > getRangeMap() const override
Returns the Map associated with the domain of this graph.
void replaceColMap(const Teuchos::RCP< const map_type > &newColMap)
Replace the graph&#39;s current column Map with the given Map.
void getGlobalRowView(const global_ordinal_type gblRow, Teuchos::ArrayView< const global_ordinal_type > &gblColInds) const override
Get a const, non-persisting view of the given global row&#39;s global column indices, as a Teuchos::Array...
::Kokkos::Compat::KokkosDeviceWrapperNode< execution_space > node_type
Default value of Node template parameter.
global_size_t getGlobalNumCols() const override
Returns the number of global columns in the graph.
RowInfo getRowInfo(const local_ordinal_type myRow) const
Get information about the locally owned row with local index myRow.
size_t getNodeNumCols() const override
Returns the number of columns connected to the locally owned rows of this graph.
void replaceDomainMapAndImporter(const Teuchos::RCP< const map_type > &newDomainMap, const Teuchos::RCP< const import_type > &newImporter)
Replace the current domain Map and Import with the given parameters.
bool supportsRowViews() const override
Whether this class implements getLocalRowView() and getGlobalRowView() (it does). ...
LocalOrdinal getLocalElement(GlobalOrdinal globalIndex) const
The local index corresponding to the given global index.
local_map_type getLocalMap() const
Get the local Map for Kokkos kernels.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
void computeGlobalConstants(const bool computeLocalTriangularConstants)
Compute global constants, if they have not yet been computed.
Teuchos::ArrayView< const local_ordinal_type > getLocalView(const RowInfo &rowinfo) const
Get a const, nonowned, locally indexed view of the locally owned row myRow, such that rowinfo = getRo...
A parallel distribution of indices over processes.
local_ordinal_type getGlobalViewRawConst(const global_ordinal_type *&gblInds, local_ordinal_type &capacity, const RowInfo &rowInfo) const
Get a pointer to the global column indices of a locally owned row, using the result of getRowInfoFrom...
int makeColMap(Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &colMap, Teuchos::Array< int > &remotePIDs, const Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &domMap, const RowGraph< LO, GO, NT > &graph, const bool sortEachProcsGids=true, std::ostream *errStrm=NULL)
Make the graph&#39;s column Map.
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects...
void setParameterList(const Teuchos::RCP< Teuchos::ParameterList > &params) override
Set the given list of parameters (must be nonnull).
size_t getNumEntriesInGlobalRow(global_ordinal_type globalRow) const override
Returns the current number of entries on this node in the specified global row.
void fillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Tell the graph that you are done changing its structure.
typename Node::device_type device_type
This class&#39; Kokkos device type.
bool isLocallyIndexed() const override
Whether the graph&#39;s column indices are stored as local indices.
A distributed dense vector.
Teuchos::ArrayRCP< const local_ordinal_type > getNodePackedIndices() const
Get an Teuchos::ArrayRCP of the packed column-indices.
bool isFillActive() const
Whether resumeFill() has been called and the graph is in edit mode.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
Returns the communicator.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
void importAndFillComplete(Teuchos::RCP< CrsGraph< local_ordinal_type, global_ordinal_type, Node > > &destGraph, const import_type &importer, const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Import from this to the given destination graph, and make the result fill complete.
global_size_t globalMaxNumRowEntries_
Global maximum of the number of entries in each row.
static size_t verbosePrintCountThreshold()
Threshold, below which arrays, lists, etc. will be printed in debug mode.
local_graph_type getLocalGraph() const
Get the local graph.
void packCrsGraphNew(const CrsGraph< LO, GO, NT > &sourceGraph, const Kokkos::DualView< const LO *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportLIDs, const Kokkos::DualView< const int *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportPIDs, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports, Kokkos::DualView< size_t *, typename CrsGraph< LO, GO, NT >::buffer_device_type > numPacketsPerLID, size_t &constantNumPackets, const bool pack_pids, Distributor &distor)
Pack specified entries of the given local sparse graph for communication, for &quot;new&quot; DistObject interf...
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices, const Padding &padding)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries...
size_t insertIndices(RowInfo &rowInfo, const SLocalGlobalViews &newInds, const ELocalGlobal lg, const ELocalGlobal I)
Insert indices into the given row.
global_size_t getGlobalNumEntries() const override
Returns the global number of entries in the graph.
Declaration and definition of Tpetra::Details::getEntryOnHost.
size_t numAllocForAllRows_
The maximum number of entries to allow in each locally owned row.
Teuchos::RCP< const map_type > colMap_
The Map describing the distribution of columns of the graph.
global_ordinal_type getIndexBase() const override
Returns the index base for global indices for this graph.
void packCrsGraphWithOwningPIDs(const CrsGraph< LO, GO, NT > &sourceGraph, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse graph for communication.
virtual void pack(const Teuchos::ArrayView< const local_ordinal_type > &exportLIDs, Teuchos::Array< global_ordinal_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, size_t &constantNumPackets, Distributor &distor) const override
Pack this object&#39;s data for Import or Export.
void getLocalRowCopy(local_ordinal_type lclRow, const Teuchos::ArrayView< local_ordinal_type > &lclColInds, size_t &numColInds) const override
Get a copy of the given row, using local indices.
nonlocals_type nonlocals_
Nonlocal data given to insertGlobalIndices.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const override
Print this object to the given output stream with the given verbosity level.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.