Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_getDiagCopyWithoutOffsets_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_DETAILS_GETDIAGCOPYWITHOUTOFFSETS_DEF_HPP
11 #define TPETRA_DETAILS_GETDIAGCOPYWITHOUTOFFSETS_DEF_HPP
12 
20 
22 #include "Tpetra_RowGraph.hpp"
23 #include "Tpetra_CrsGraph.hpp"
24 #include "Tpetra_RowMatrix.hpp"
25 #include "Tpetra_Vector.hpp"
26 
27 namespace Tpetra {
28 namespace Details {
29 
30 // Work-around for #499: Implementation of one-argument (no offsets)
31 // getLocalDiagCopy for the NOT fill-complete case.
32 //
33 // NOTE (mfh 18 Jul 2016) This calls functions that are NOT GPU device
34 // functions! Thus, we do NOT use KOKKOS_INLINE_FUNCTION or
35 // KOKKOS_FUNCTION here, because those attempt to mark the functions
36 // they modify as CUDA device functions. This functor is ONLY for
37 // non-CUDA execution spaces!
38 template <class SC, class LO, class GO, class NT>
39 class GetLocalDiagCopyWithoutOffsetsNotFillCompleteFunctor {
40  public:
41  using row_matrix_type = ::Tpetra::RowMatrix<SC, LO, GO, NT>;
42  using vec_type = ::Tpetra::Vector<SC, LO, GO, NT>;
43 
44  using IST = typename vec_type::impl_scalar_type;
45  // The output Vector determines the execution space.
46 
47  using host_execution_space = typename vec_type::dual_view_type::t_host::execution_space;
48 
49  private:
50  using map_type = typename vec_type::map_type;
51 
52  static bool
53  graphIsSorted(const row_matrix_type& A) {
54  using Teuchos::RCP;
55  using Teuchos::rcp_dynamic_cast;
56  using crs_graph_type = Tpetra::CrsGraph<LO, GO, NT>;
57  using row_graph_type = Tpetra::RowGraph<LO, GO, NT>;
58 
59  // We conservatively assume not sorted. RowGraph lacks an
60  // "isSorted" predicate, so we can't know for sure unless the cast
61  // to CrsGraph succeeds.
62  bool sorted = false;
63 
64  RCP<const row_graph_type> G_row = A.getGraph();
65  if (!G_row.is_null()) {
66  RCP<const crs_graph_type> G_crs =
67  rcp_dynamic_cast<const crs_graph_type>(G_row);
68  if (!G_crs.is_null()) {
69  sorted = G_crs->isSorted();
70  }
71  }
72 
73  return sorted;
74  }
75 
76  public:
77  // lclNumErrs [out] Total count of errors on this process.
78  GetLocalDiagCopyWithoutOffsetsNotFillCompleteFunctor(LO& lclNumErrs,
79  vec_type& diag,
80  const row_matrix_type& A)
81  : A_(A)
82  , lclRowMap_(*A.getRowMap())
83  , lclColMap_(*A.getColMap())
84  , sorted_(graphIsSorted(A)) {
85  const LO lclNumRows = static_cast<LO>(diag.getLocalLength());
86  {
87  const LO matLclNumRows =
88  static_cast<LO>(lclRowMap_.getLocalNumElements());
89  TEUCHOS_TEST_FOR_EXCEPTION(lclNumRows != matLclNumRows, std::invalid_argument,
90  "diag.getLocalLength() = " << lclNumRows << " != "
91  "A.getRowMap()->getLocalNumElements() = "
92  << matLclNumRows << ".");
93  }
94 
95  // Side effects start below this point.
96  D_lcl_ = diag.getLocalViewHost(Access::OverwriteAll);
97  D_lcl_1d_ = Kokkos::subview(D_lcl_, Kokkos::ALL(), 0);
98 
99  Kokkos::RangePolicy<host_execution_space, LO> range(0, lclNumRows);
100  lclNumErrs = 0;
101  Kokkos::parallel_reduce(range, *this, lclNumErrs);
102 
103  // sync changes back to device, since the user doesn't know that
104  // we had to run on host.
105  // diag.template sync<typename device_type::memory_space> ();
106  }
107 
108  void operator()(const LO& lclRowInd, LO& errCount) const {
109  using KokkosSparse::findRelOffset;
110 
111 #if KOKKOS_VERSION >= 40799
112  D_lcl_1d_(lclRowInd) = KokkosKernels::ArithTraits<IST>::zero();
113 #else
114  D_lcl_1d_(lclRowInd) = Kokkos::ArithTraits<IST>::zero();
115 #endif
116  const GO gblInd = lclRowMap_.getGlobalElement(lclRowInd);
117  const LO lclColInd = lclColMap_.getLocalElement(gblInd);
118 
119  if (lclColInd == Tpetra::Details::OrdinalTraits<LO>::invalid()) {
120  errCount++;
121  } else { // row index is also in the column Map on this process
122  typename row_matrix_type::local_inds_host_view_type lclColInds;
123  typename row_matrix_type::values_host_view_type curVals;
124  A_.getLocalRowView(lclRowInd, lclColInds, curVals);
125  LO numEnt = lclColInds.extent(0);
126  // The search hint is always zero, since we only call this
127  // once per row of the matrix.
128  const LO hint = 0;
129  const LO offset =
130  findRelOffset(lclColInds, numEnt, lclColInd, hint, sorted_);
131  if (offset == numEnt) { // didn't find the diagonal column index
132  errCount++;
133  } else {
134  D_lcl_1d_(lclRowInd) = curVals[offset];
135  }
136  }
137  }
138 
139  private:
140  const row_matrix_type& A_;
141  map_type lclRowMap_;
142  map_type lclColMap_;
143  typename vec_type::dual_view_type::t_host D_lcl_;
144  decltype(Kokkos::subview(D_lcl_, Kokkos::ALL(), 0)) D_lcl_1d_;
145  const bool sorted_;
146 };
147 
148 template <class SC, class LO, class GO, class NT>
149 LO getLocalDiagCopyWithoutOffsetsNotFillComplete(::Tpetra::Vector<SC, LO, GO, NT>& diag,
150  const ::Tpetra::RowMatrix<SC, LO, GO, NT>& A,
151  const bool debug) {
152  using Teuchos::outArg;
153  using Teuchos::REDUCE_MIN;
154  using Teuchos::reduceAll;
155  using ::Tpetra::Details::gathervPrint;
156  using functor_type = GetLocalDiagCopyWithoutOffsetsNotFillCompleteFunctor<SC, LO, GO, NT>;
157 
158  // The functor's constructor does error checking and executes the
159  // thread-parallel kernel.
160 
161  LO lclNumErrs = 0;
162 
163  if (debug) {
164  int lclSuccess = 1;
165  int gblSuccess = 0;
166  std::ostringstream errStrm;
167  Teuchos::RCP<const Teuchos::Comm<int> > commPtr = A.getComm();
168  if (commPtr.is_null()) {
169  return lclNumErrs; // this process does not participate
170  }
171  const Teuchos::Comm<int>& comm = *commPtr;
172 
173  try {
174  functor_type functor(lclNumErrs, diag, A);
175  } catch (std::exception& e) {
176  lclSuccess = -1;
177  errStrm << "Process " << A.getComm()->getRank() << ": "
178  << e.what() << std::endl;
179  }
180  if (lclNumErrs != 0) {
181  lclSuccess = 0;
182  }
183 
184  reduceAll<int, int>(comm, REDUCE_MIN, lclSuccess, outArg(gblSuccess));
185  if (gblSuccess == -1) {
186  if (comm.getRank() == 0) {
187  // We gather into std::cerr, rather than using an
188  // std::ostringstream, because there might be a lot of MPI
189  // processes. It could take too much memory to gather all the
190  // messages to Process 0 before printing. gathervPrint gathers
191  // and prints one message at a time, thus saving memory. I
192  // don't want to run out of memory while trying to print an
193  // error message; that would hide the real problem.
194  std::cerr << "getLocalDiagCopyWithoutOffsetsNotFillComplete threw an "
195  "exception on one or more MPI processes in the matrix's comunicator."
196  << std::endl;
197  }
198  gathervPrint(std::cerr, errStrm.str(), comm);
199  // Don't need to print anything here, since we've already
200  // printed to std::cerr above.
201  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, "");
202  } else if (gblSuccess == 0) {
203  TEUCHOS_TEST_FOR_EXCEPTION(gblSuccess != 1, std::runtime_error,
204  "getLocalDiagCopyWithoutOffsetsNotFillComplete failed on "
205  "one or more MPI processes in the matrix's communicator.");
206  }
207  } else { // ! debug
208  functor_type functor(lclNumErrs, diag, A);
209  }
210 
211  return lclNumErrs;
212 }
213 
214 } // namespace Details
215 } // namespace Tpetra
216 
217 // Explicit template instantiation macro for
218 // getLocalDiagCopyWithoutOffsetsNotFillComplete. NOT FOR USERS!!!
219 // Must be used inside the Tpetra namespace.
220 #define TPETRA_DETAILS_GETDIAGCOPYWITHOUTOFFSETS_INSTANT(SCALAR, LO, GO, NODE) \
221  template LO \
222  Details::getLocalDiagCopyWithoutOffsetsNotFillComplete<SCALAR, LO, GO, NODE>(::Tpetra::Vector<SCALAR, LO, GO, NODE> & diag, \
223  const ::Tpetra::RowMatrix<SCALAR, LO, GO, NODE>& A, \
224  const bool debug);
225 
226 #endif // TPETRA_DETAILS_GETDIAGCOPYWITHOUTOFFSETS_DEF_HPP
An abstract interface for graphs accessed by rows.
base_type::impl_scalar_type impl_scalar_type
The type used internally in place of Scalar.
Declaration of a function that prints strings from each process.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator, in rank order.
LO getLocalDiagCopyWithoutOffsetsNotFillComplete(::Tpetra::Vector< SC, LO, GO, NT > &diag, const ::Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool debug=false)
Given a locally indexed, global sparse matrix, extract the matrix&#39;s diagonal entries into a Tpetra::V...
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
A read-only, row-oriented interface to a sparse matrix.
A distributed dense vector.
base_type::map_type map_type
The type of the Map specialization used by this class.