Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_getDiagCopyWithoutOffsets_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_DETAILS_GETDIAGCOPYWITHOUTOFFSETS_DEF_HPP
11 #define TPETRA_DETAILS_GETDIAGCOPYWITHOUTOFFSETS_DEF_HPP
12 
20 
22 #include "Tpetra_RowGraph.hpp"
23 #include "Tpetra_CrsGraph.hpp"
24 #include "Tpetra_RowMatrix.hpp"
25 #include "Tpetra_Vector.hpp"
26 
27 namespace Tpetra {
28 namespace Details {
29 
30 // Work-around for #499: Implementation of one-argument (no offsets)
31 // getLocalDiagCopy for the NOT fill-complete case.
32 //
33 // NOTE (mfh 18 Jul 2016) This calls functions that are NOT GPU device
34 // functions! Thus, we do NOT use KOKKOS_INLINE_FUNCTION or
35 // KOKKOS_FUNCTION here, because those attempt to mark the functions
36 // they modify as CUDA device functions. This functor is ONLY for
37 // non-CUDA execution spaces!
38 template<class SC, class LO, class GO, class NT>
39 class GetLocalDiagCopyWithoutOffsetsNotFillCompleteFunctor {
40 public:
41  typedef ::Tpetra::RowMatrix<SC, LO, GO, NT> row_matrix_type;
42  typedef ::Tpetra::Vector<SC, LO, GO, NT> vec_type;
43 
44  typedef typename vec_type::impl_scalar_type IST;
45  // The output Vector determines the execution space.
46 
47 private:
48  typedef typename vec_type::dual_view_type::t_host::execution_space host_execution_space;
49  typedef typename vec_type::map_type map_type;
50 
51  static bool
52  graphIsSorted (const row_matrix_type& A)
53  {
54  using Teuchos::RCP;
55  using Teuchos::rcp_dynamic_cast;
56  typedef Tpetra::CrsGraph<LO, GO, NT> crs_graph_type;
57  typedef Tpetra::RowGraph<LO, GO, NT> row_graph_type;
58 
59  // We conservatively assume not sorted. RowGraph lacks an
60  // "isSorted" predicate, so we can't know for sure unless the cast
61  // to CrsGraph succeeds.
62  bool sorted = false;
63 
64  RCP<const row_graph_type> G_row = A.getGraph ();
65  if (! G_row.is_null ()) {
66  RCP<const crs_graph_type> G_crs =
67  rcp_dynamic_cast<const crs_graph_type> (G_row);
68  if (! G_crs.is_null ()) {
69  sorted = G_crs->isSorted ();
70  }
71  }
72 
73  return sorted;
74  }
75 
76 public:
77  // lclNumErrs [out] Total count of errors on this process.
78  GetLocalDiagCopyWithoutOffsetsNotFillCompleteFunctor (LO& lclNumErrs,
79  vec_type& diag,
80  const row_matrix_type& A) :
81  A_ (A),
82  lclRowMap_ (*A.getRowMap ()),
83  lclColMap_ (*A.getColMap ()),
84  sorted_ (graphIsSorted (A))
85  {
86  const LO lclNumRows = static_cast<LO> (diag.getLocalLength ());
87  {
88  const LO matLclNumRows =
89  static_cast<LO> (lclRowMap_.getLocalNumElements ());
90  TEUCHOS_TEST_FOR_EXCEPTION
91  (lclNumRows != matLclNumRows, std::invalid_argument,
92  "diag.getLocalLength() = " << lclNumRows << " != "
93  "A.getRowMap()->getLocalNumElements() = " << matLclNumRows << ".");
94  }
95 
96  // Side effects start below this point.
97  D_lcl_ = diag.getLocalViewHost(Access::OverwriteAll);
98  D_lcl_1d_ = Kokkos::subview (D_lcl_, Kokkos::ALL (), 0);
99 
100  Kokkos::RangePolicy<host_execution_space, LO> range (0, lclNumRows);
101  lclNumErrs = 0;
102  Kokkos::parallel_reduce (range, *this, lclNumErrs);
103 
104  // sync changes back to device, since the user doesn't know that
105  // we had to run on host.
106  //diag.template sync<typename device_type::memory_space> ();
107  }
108 
109  void operator () (const LO& lclRowInd, LO& errCount) const {
110  using KokkosSparse::findRelOffset;
111 
112  D_lcl_1d_(lclRowInd) = Kokkos::ArithTraits<IST>::zero ();
113  const GO gblInd = lclRowMap_.getGlobalElement (lclRowInd);
114  const LO lclColInd = lclColMap_.getLocalElement (gblInd);
115 
116  if (lclColInd == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
117  errCount++;
118  }
119  else { // row index is also in the column Map on this process
120  typename row_matrix_type::local_inds_host_view_type lclColInds;
121  typename row_matrix_type::values_host_view_type curVals;
122  A_.getLocalRowView(lclRowInd, lclColInds, curVals);
123  LO numEnt = lclColInds.extent(0);
124  // The search hint is always zero, since we only call this
125  // once per row of the matrix.
126  const LO hint = 0;
127  const LO offset =
128  findRelOffset (lclColInds, numEnt, lclColInd, hint, sorted_);
129  if (offset == numEnt) { // didn't find the diagonal column index
130  errCount++;
131  }
132  else {
133  D_lcl_1d_(lclRowInd) = curVals[offset];
134  }
135  }
136  }
137 
138 private:
139  const row_matrix_type& A_;
140  map_type lclRowMap_;
141  map_type lclColMap_;
142  typename vec_type::dual_view_type::t_host D_lcl_;
143  decltype (Kokkos::subview (D_lcl_, Kokkos::ALL (), 0)) D_lcl_1d_;
144  const bool sorted_;
145 };
146 
147 
148 template<class SC, class LO, class GO, class NT>
149 LO
150 getLocalDiagCopyWithoutOffsetsNotFillComplete ( ::Tpetra::Vector<SC, LO, GO, NT>& diag,
151  const ::Tpetra::RowMatrix<SC, LO, GO, NT>& A,
152  const bool debug)
153 {
154  using ::Tpetra::Details::gathervPrint;
155  using Teuchos::outArg;
156  using Teuchos::REDUCE_MIN;
157  using Teuchos::reduceAll;
158  typedef GetLocalDiagCopyWithoutOffsetsNotFillCompleteFunctor<SC,
159  LO, GO, NT> functor_type;
160 
161  // The functor's constructor does error checking and executes the
162  // thread-parallel kernel.
163 
164  LO lclNumErrs = 0;
165 
166  if (debug) {
167  int lclSuccess = 1;
168  int gblSuccess = 0;
169  std::ostringstream errStrm;
170  Teuchos::RCP<const Teuchos::Comm<int> > commPtr = A.getComm ();
171  if (commPtr.is_null ()) {
172  return lclNumErrs; // this process does not participate
173  }
174  const Teuchos::Comm<int>& comm = *commPtr;
175 
176  try {
177  functor_type functor (lclNumErrs, diag, A);
178  }
179  catch (std::exception& e) {
180  lclSuccess = -1;
181  errStrm << "Process " << A.getComm ()->getRank () << ": "
182  << e.what () << std::endl;
183  }
184  if (lclNumErrs != 0) {
185  lclSuccess = 0;
186  }
187 
188  reduceAll<int, int> (comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
189  if (gblSuccess == -1) {
190  if (comm.getRank () == 0) {
191  // We gather into std::cerr, rather than using an
192  // std::ostringstream, because there might be a lot of MPI
193  // processes. It could take too much memory to gather all the
194  // messages to Process 0 before printing. gathervPrint gathers
195  // and prints one message at a time, thus saving memory. I
196  // don't want to run out of memory while trying to print an
197  // error message; that would hide the real problem.
198  std::cerr << "getLocalDiagCopyWithoutOffsetsNotFillComplete threw an "
199  "exception on one or more MPI processes in the matrix's comunicator."
200  << std::endl;
201  }
202  gathervPrint (std::cerr, errStrm.str (), comm);
203  // Don't need to print anything here, since we've already
204  // printed to std::cerr above.
205  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, "");
206  }
207  else if (gblSuccess == 0) {
208  TEUCHOS_TEST_FOR_EXCEPTION
209  (gblSuccess != 1, std::runtime_error,
210  "getLocalDiagCopyWithoutOffsetsNotFillComplete failed on "
211  "one or more MPI processes in the matrix's communicator.");
212  }
213  }
214  else { // ! debug
215  functor_type functor (lclNumErrs, diag, A);
216  }
217 
218  return lclNumErrs;
219 }
220 
221 } // namespace Details
222 } // namespace Tpetra
223 
224 // Explicit template instantiation macro for
225 // getLocalDiagCopyWithoutOffsetsNotFillComplete. NOT FOR USERS!!!
226 // Must be used inside the Tpetra namespace.
227 #define TPETRA_DETAILS_GETDIAGCOPYWITHOUTOFFSETS_INSTANT( SCALAR, LO, GO, NODE ) \
228  template LO \
229  Details::getLocalDiagCopyWithoutOffsetsNotFillComplete< SCALAR, LO, GO, NODE > \
230  ( ::Tpetra::Vector< SCALAR, LO, GO, NODE >& diag, \
231  const ::Tpetra::RowMatrix< SCALAR, LO, GO, NODE >& A, \
232  const bool debug);
233 
234 #endif // TPETRA_DETAILS_GETDIAGCOPYWITHOUTOFFSETS_DEF_HPP
An abstract interface for graphs accessed by rows.
base_type::impl_scalar_type impl_scalar_type
The type used internally in place of Scalar.
Declaration of a function that prints strings from each process.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator, in rank order.
LO getLocalDiagCopyWithoutOffsetsNotFillComplete(::Tpetra::Vector< SC, LO, GO, NT > &diag, const ::Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool debug=false)
Given a locally indexed, global sparse matrix, extract the matrix&#39;s diagonal entries into a Tpetra::V...
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
A read-only, row-oriented interface to a sparse matrix.
A distributed dense vector.
base_type::map_type map_type
The type of the Map specialization used by this class.