Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_getDiagCopyWithoutOffsets_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_DETAILS_GETDIAGCOPYWITHOUTOFFSETS_DEF_HPP
11 #define TPETRA_DETAILS_GETDIAGCOPYWITHOUTOFFSETS_DEF_HPP
12 
20 
22 #include "Tpetra_RowGraph.hpp"
23 #include "Tpetra_CrsGraph.hpp"
24 #include "Tpetra_RowMatrix.hpp"
25 #include "Tpetra_Vector.hpp"
26 
27 namespace Tpetra {
28 namespace Details {
29 
30 // Work-around for #499: Implementation of one-argument (no offsets)
31 // getLocalDiagCopy for the NOT fill-complete case.
32 //
33 // NOTE (mfh 18 Jul 2016) This calls functions that are NOT GPU device
34 // functions! Thus, we do NOT use KOKKOS_INLINE_FUNCTION or
35 // KOKKOS_FUNCTION here, because those attempt to mark the functions
36 // they modify as CUDA device functions. This functor is ONLY for
37 // non-CUDA execution spaces!
38 template<class SC, class LO, class GO, class NT>
39 class GetLocalDiagCopyWithoutOffsetsNotFillCompleteFunctor {
40 public:
41  using row_matrix_type = ::Tpetra::RowMatrix<SC, LO, GO, NT>;
42  using vec_type = ::Tpetra::Vector<SC, LO, GO, NT>;
43 
44  using IST = typename vec_type::impl_scalar_type;
45  // The output Vector determines the execution space.
46 
47  using host_execution_space = typename vec_type::dual_view_type::t_host::execution_space;
48 private:
49  using map_type = typename vec_type::map_type;
50 
51  static bool
52  graphIsSorted (const row_matrix_type& A)
53  {
54  using Teuchos::RCP;
55  using Teuchos::rcp_dynamic_cast;
56  using crs_graph_type = Tpetra::CrsGraph<LO, GO, NT>;
57  using row_graph_type = Tpetra::RowGraph<LO, GO, NT>;
58 
59  // We conservatively assume not sorted. RowGraph lacks an
60  // "isSorted" predicate, so we can't know for sure unless the cast
61  // to CrsGraph succeeds.
62  bool sorted = false;
63 
64  RCP<const row_graph_type> G_row = A.getGraph ();
65  if (! G_row.is_null ()) {
66  RCP<const crs_graph_type> G_crs =
67  rcp_dynamic_cast<const crs_graph_type> (G_row);
68  if (! G_crs.is_null ()) {
69  sorted = G_crs->isSorted ();
70  }
71  }
72 
73  return sorted;
74  }
75 
76 public:
77  // lclNumErrs [out] Total count of errors on this process.
78  GetLocalDiagCopyWithoutOffsetsNotFillCompleteFunctor (LO& lclNumErrs,
79  vec_type& diag,
80  const row_matrix_type& A) :
81  A_ (A),
82  lclRowMap_ (*A.getRowMap ()),
83  lclColMap_ (*A.getColMap ()),
84  sorted_ (graphIsSorted (A))
85  {
86  const LO lclNumRows = static_cast<LO> (diag.getLocalLength ());
87  {
88  const LO matLclNumRows =
89  static_cast<LO> (lclRowMap_.getLocalNumElements ());
90  TEUCHOS_TEST_FOR_EXCEPTION
91  (lclNumRows != matLclNumRows, std::invalid_argument,
92  "diag.getLocalLength() = " << lclNumRows << " != "
93  "A.getRowMap()->getLocalNumElements() = " << matLclNumRows << ".");
94  }
95 
96  // Side effects start below this point.
97  D_lcl_ = diag.getLocalViewHost(Access::OverwriteAll);
98  D_lcl_1d_ = Kokkos::subview (D_lcl_, Kokkos::ALL (), 0);
99 
100  Kokkos::RangePolicy<host_execution_space, LO> range (0, lclNumRows);
101  lclNumErrs = 0;
102  Kokkos::parallel_reduce (range, *this, lclNumErrs);
103 
104  // sync changes back to device, since the user doesn't know that
105  // we had to run on host.
106  //diag.template sync<typename device_type::memory_space> ();
107  }
108 
109  void operator () (const LO& lclRowInd, LO& errCount) const {
110  using KokkosSparse::findRelOffset;
111 
112  D_lcl_1d_(lclRowInd) = Kokkos::ArithTraits<IST>::zero ();
113  const GO gblInd = lclRowMap_.getGlobalElement (lclRowInd);
114  const LO lclColInd = lclColMap_.getLocalElement (gblInd);
115 
116  if (lclColInd == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
117  errCount++;
118  }
119  else { // row index is also in the column Map on this process
120  typename row_matrix_type::local_inds_host_view_type lclColInds;
121  typename row_matrix_type::values_host_view_type curVals;
122  A_.getLocalRowView(lclRowInd, lclColInds, curVals);
123  LO numEnt = lclColInds.extent(0);
124  // The search hint is always zero, since we only call this
125  // once per row of the matrix.
126  const LO hint = 0;
127  const LO offset =
128  findRelOffset (lclColInds, numEnt, lclColInd, hint, sorted_);
129  if (offset == numEnt) { // didn't find the diagonal column index
130  errCount++;
131  }
132  else {
133  D_lcl_1d_(lclRowInd) = curVals[offset];
134  }
135  }
136  }
137 
138 private:
139  const row_matrix_type& A_;
140  map_type lclRowMap_;
141  map_type lclColMap_;
142  typename vec_type::dual_view_type::t_host D_lcl_;
143  decltype (Kokkos::subview (D_lcl_, Kokkos::ALL (), 0)) D_lcl_1d_;
144  const bool sorted_;
145 };
146 
147 
148 template<class SC, class LO, class GO, class NT>
149 LO
150 getLocalDiagCopyWithoutOffsetsNotFillComplete ( ::Tpetra::Vector<SC, LO, GO, NT>& diag,
151  const ::Tpetra::RowMatrix<SC, LO, GO, NT>& A,
152  const bool debug)
153 {
154  using ::Tpetra::Details::gathervPrint;
155  using Teuchos::outArg;
156  using Teuchos::REDUCE_MIN;
157  using Teuchos::reduceAll;
158  using functor_type = GetLocalDiagCopyWithoutOffsetsNotFillCompleteFunctor<SC, LO, GO, NT>;
159 
160  // The functor's constructor does error checking and executes the
161  // thread-parallel kernel.
162 
163  LO lclNumErrs = 0;
164 
165  if (debug) {
166  int lclSuccess = 1;
167  int gblSuccess = 0;
168  std::ostringstream errStrm;
169  Teuchos::RCP<const Teuchos::Comm<int> > commPtr = A.getComm ();
170  if (commPtr.is_null ()) {
171  return lclNumErrs; // this process does not participate
172  }
173  const Teuchos::Comm<int>& comm = *commPtr;
174 
175  try {
176  functor_type functor (lclNumErrs, diag, A);
177  }
178  catch (std::exception& e) {
179  lclSuccess = -1;
180  errStrm << "Process " << A.getComm ()->getRank () << ": "
181  << e.what () << std::endl;
182  }
183  if (lclNumErrs != 0) {
184  lclSuccess = 0;
185  }
186 
187  reduceAll<int, int> (comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
188  if (gblSuccess == -1) {
189  if (comm.getRank () == 0) {
190  // We gather into std::cerr, rather than using an
191  // std::ostringstream, because there might be a lot of MPI
192  // processes. It could take too much memory to gather all the
193  // messages to Process 0 before printing. gathervPrint gathers
194  // and prints one message at a time, thus saving memory. I
195  // don't want to run out of memory while trying to print an
196  // error message; that would hide the real problem.
197  std::cerr << "getLocalDiagCopyWithoutOffsetsNotFillComplete threw an "
198  "exception on one or more MPI processes in the matrix's comunicator."
199  << std::endl;
200  }
201  gathervPrint (std::cerr, errStrm.str (), comm);
202  // Don't need to print anything here, since we've already
203  // printed to std::cerr above.
204  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, "");
205  }
206  else if (gblSuccess == 0) {
207  TEUCHOS_TEST_FOR_EXCEPTION
208  (gblSuccess != 1, std::runtime_error,
209  "getLocalDiagCopyWithoutOffsetsNotFillComplete failed on "
210  "one or more MPI processes in the matrix's communicator.");
211  }
212  }
213  else { // ! debug
214  functor_type functor (lclNumErrs, diag, A);
215  }
216 
217  return lclNumErrs;
218 }
219 
220 } // namespace Details
221 } // namespace Tpetra
222 
223 // Explicit template instantiation macro for
224 // getLocalDiagCopyWithoutOffsetsNotFillComplete. NOT FOR USERS!!!
225 // Must be used inside the Tpetra namespace.
226 #define TPETRA_DETAILS_GETDIAGCOPYWITHOUTOFFSETS_INSTANT( SCALAR, LO, GO, NODE ) \
227  template LO \
228  Details::getLocalDiagCopyWithoutOffsetsNotFillComplete< SCALAR, LO, GO, NODE > \
229  ( ::Tpetra::Vector< SCALAR, LO, GO, NODE >& diag, \
230  const ::Tpetra::RowMatrix< SCALAR, LO, GO, NODE >& A, \
231  const bool debug);
232 
233 #endif // TPETRA_DETAILS_GETDIAGCOPYWITHOUTOFFSETS_DEF_HPP
An abstract interface for graphs accessed by rows.
base_type::impl_scalar_type impl_scalar_type
The type used internally in place of Scalar.
Declaration of a function that prints strings from each process.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator, in rank order.
LO getLocalDiagCopyWithoutOffsetsNotFillComplete(::Tpetra::Vector< SC, LO, GO, NT > &diag, const ::Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool debug=false)
Given a locally indexed, global sparse matrix, extract the matrix&#39;s diagonal entries into a Tpetra::V...
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
A read-only, row-oriented interface to a sparse matrix.
A distributed dense vector.
base_type::map_type map_type
The type of the Map specialization used by this class.