Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_getDiagCopyWithoutOffsets_def.hpp
Go to the documentation of this file.
1 /*
2 // @HEADER
3 // ***********************************************************************
4 //
5 // Tpetra: Templated Linear Algebra Services Package
6 // Copyright (2008) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ************************************************************************
41 // @HEADER
42 */
43 
44 #ifndef TPETRA_DETAILS_GETDIAGCOPYWITHOUTOFFSETS_DEF_HPP
45 #define TPETRA_DETAILS_GETDIAGCOPYWITHOUTOFFSETS_DEF_HPP
46 
54 
56 #include "Tpetra_RowGraph.hpp"
57 #include "Tpetra_CrsGraph.hpp"
58 #include "Tpetra_RowMatrix.hpp"
59 #include "Tpetra_Vector.hpp"
60 
61 namespace Tpetra {
62 namespace Details {
63 
64 // Work-around for #499: Implementation of one-argument (no offsets)
65 // getLocalDiagCopy for the NOT fill-complete case.
66 //
67 // NOTE (mfh 18 Jul 2016) This calls functions that are NOT GPU device
68 // functions! Thus, we do NOT use KOKKOS_INLINE_FUNCTION or
69 // KOKKOS_FUNCTION here, because those attempt to mark the functions
70 // they modify as CUDA device functions. This functor is ONLY for
71 // non-CUDA execution spaces!
72 template<class SC, class LO, class GO, class NT>
73 class GetLocalDiagCopyWithoutOffsetsNotFillCompleteFunctor {
74 public:
75  typedef ::Tpetra::RowMatrix<SC, LO, GO, NT> row_matrix_type;
76  typedef ::Tpetra::Vector<SC, LO, GO, NT> vec_type;
77 
78  typedef typename vec_type::impl_scalar_type IST;
79  // The output Vector determines the execution space.
80  typedef typename vec_type::device_type device_type;
81 
82 private:
83  typedef typename vec_type::dual_view_type::t_host::execution_space host_execution_space;
84  typedef typename vec_type::map_type map_type;
85 
86  static bool
87  graphIsSorted (const row_matrix_type& A)
88  {
89  using Teuchos::RCP;
90  using Teuchos::rcp_dynamic_cast;
91  typedef Tpetra::CrsGraph<LO, GO, NT> crs_graph_type;
92  typedef Tpetra::RowGraph<LO, GO, NT> row_graph_type;
93 
94  // We conservatively assume not sorted. RowGraph lacks an
95  // "isSorted" predicate, so we can't know for sure unless the cast
96  // to CrsGraph succeeds.
97  bool sorted = false;
98 
99  RCP<const row_graph_type> G_row = A.getGraph ();
100  if (! G_row.is_null ()) {
101  RCP<const crs_graph_type> G_crs =
102  rcp_dynamic_cast<const crs_graph_type> (G_row);
103  if (! G_crs.is_null ()) {
104  sorted = G_crs->isSorted ();
105  }
106  }
107 
108  return sorted;
109  }
110 
111 public:
112  // lclNumErrs [out] Total count of errors on this process.
113  GetLocalDiagCopyWithoutOffsetsNotFillCompleteFunctor (LO& lclNumErrs,
114  vec_type& diag,
115  const row_matrix_type& A) :
116  A_ (A),
117  lclRowMap_ (A.getRowMap ()->getLocalMap ()),
118  lclColMap_ (A.getColMap ()->getLocalMap ()),
119  sorted_ (graphIsSorted (A))
120  {
121  const LO lclNumRows = static_cast<LO> (diag.getLocalLength ());
122  {
123  const LO matLclNumRows =
124  static_cast<LO> (lclRowMap_.getNodeNumElements ());
125  TEUCHOS_TEST_FOR_EXCEPTION
126  (lclNumRows != matLclNumRows, std::invalid_argument,
127  "diag.getLocalLength() = " << lclNumRows << " != "
128  "A.getRowMap()->getNodeNumElements() = " << matLclNumRows << ".");
129  }
130 
131  // Side effects start below this point.
132 
133  diag.modify_host ();
134  D_lcl_ = diag.getLocalViewHost ();
135  D_lcl_1d_ = Kokkos::subview (D_lcl_, Kokkos::ALL (), 0);
136 
137  Kokkos::RangePolicy<host_execution_space, LO> range (0, lclNumRows);
138  lclNumErrs = 0;
139  Kokkos::parallel_reduce (range, *this, lclNumErrs);
140 
141  // sync changes back to device, since the user doesn't know that
142  // we had to run on host.
143  diag.template sync<typename device_type::memory_space> ();
144  }
145 
146  void operator () (const LO& lclRowInd, LO& errCount) const {
147  using KokkosSparse::findRelOffset;
148 
149  D_lcl_1d_(lclRowInd) = Kokkos::Details::ArithTraits<IST>::zero ();
150  const GO gblInd = lclRowMap_.getGlobalElement (lclRowInd);
151  const LO lclColInd = lclColMap_.getLocalElement (gblInd);
152 
153  if (lclColInd == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
154  errCount++;
155  }
156  else { // row index is also in the column Map on this process
157  LO numEnt;
158  const LO* lclColInds;
159  const SC* curVals;
160  const LO err = A_.getLocalRowViewRaw (lclRowInd, numEnt, lclColInds, curVals);
161  if (err != 0) {
162  errCount++;
163  }
164  else {
165  // The search hint is always zero, since we only call this
166  // once per row of the matrix.
167  const LO hint = 0;
168  const LO offset =
169  findRelOffset (lclColInds, numEnt, lclColInd, hint, sorted_);
170  if (offset == numEnt) { // didn't find the diagonal column index
171  errCount++;
172  }
173  else {
174  D_lcl_1d_(lclRowInd) = curVals[offset];
175  }
176  }
177  }
178  }
179 
180 private:
181  const row_matrix_type& A_;
182  typename map_type::local_map_type lclRowMap_;
183  typename map_type::local_map_type lclColMap_;
184  typename vec_type::dual_view_type::t_host D_lcl_;
185  decltype (Kokkos::subview (D_lcl_, Kokkos::ALL (), 0)) D_lcl_1d_;
186  const bool sorted_;
187 };
188 
189 
190 template<class SC, class LO, class GO, class NT>
191 LO
192 getLocalDiagCopyWithoutOffsetsNotFillComplete ( ::Tpetra::Vector<SC, LO, GO, NT>& diag,
193  const ::Tpetra::RowMatrix<SC, LO, GO, NT>& A,
194  const bool debug)
195 {
196  using ::Tpetra::Details::gathervPrint;
197  using Teuchos::outArg;
198  using Teuchos::REDUCE_MIN;
199  using Teuchos::reduceAll;
200  typedef GetLocalDiagCopyWithoutOffsetsNotFillCompleteFunctor<SC,
201  LO, GO, NT> functor_type;
202 
203  // The functor's constructor does error checking and executes the
204  // thread-parallel kernel.
205 
206  LO lclNumErrs = 0;
207 
208  if (debug) {
209  int lclSuccess = 1;
210  int gblSuccess = 0;
211  std::ostringstream errStrm;
212  Teuchos::RCP<const Teuchos::Comm<int> > commPtr = A.getComm ();
213  if (commPtr.is_null ()) {
214  return lclNumErrs; // this process does not participate
215  }
216  const Teuchos::Comm<int>& comm = *commPtr;
217 
218  try {
219  functor_type functor (lclNumErrs, diag, A);
220  }
221  catch (std::exception& e) {
222  lclSuccess = -1;
223  errStrm << "Process " << A.getComm ()->getRank () << ": "
224  << e.what () << std::endl;
225  }
226  if (lclNumErrs != 0) {
227  lclSuccess = 0;
228  }
229 
230  reduceAll<int, int> (comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
231  if (gblSuccess == -1) {
232  if (comm.getRank () == 0) {
233  // We gather into std::cerr, rather than using an
234  // std::ostringstream, because there might be a lot of MPI
235  // processes. It could take too much memory to gather all the
236  // messages to Process 0 before printing. gathervPrint gathers
237  // and prints one message at a time, thus saving memory. I
238  // don't want to run out of memory while trying to print an
239  // error message; that would hide the real problem.
240  std::cerr << "getLocalDiagCopyWithoutOffsetsNotFillComplete threw an "
241  "exception on one or more MPI processes in the matrix's comunicator."
242  << std::endl;
243  }
244  gathervPrint (std::cerr, errStrm.str (), comm);
245  // Don't need to print anything here, since we've already
246  // printed to std::cerr above.
247  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, "");
248  }
249  else if (gblSuccess == 0) {
250  TEUCHOS_TEST_FOR_EXCEPTION
251  (gblSuccess != 1, std::runtime_error,
252  "getLocalDiagCopyWithoutOffsetsNotFillComplete failed on "
253  "one or more MPI processes in the matrix's communicator.");
254  }
255  }
256  else { // ! debug
257  functor_type functor (lclNumErrs, diag, A);
258  }
259 
260  return lclNumErrs;
261 }
262 
263 } // namespace Details
264 } // namespace Tpetra
265 
266 // Explicit template instantiation macro for
267 // getLocalDiagCopyWithoutOffsetsNotFillComplete. NOT FOR USERS!!!
268 // Must be used inside the Tpetra namespace.
269 #define TPETRA_DETAILS_GETDIAGCOPYWITHOUTOFFSETS_INSTANT( SCALAR, LO, GO, NODE ) \
270  template LO \
271  Details::getLocalDiagCopyWithoutOffsetsNotFillComplete< SCALAR, LO, GO, NODE > \
272  ( ::Tpetra::Vector< SCALAR, LO, GO, NODE >& diag, \
273  const ::Tpetra::RowMatrix< SCALAR, LO, GO, NODE >& A, \
274  const bool debug);
275 
276 #endif // TPETRA_DETAILS_GETDIAGCOPYWITHOUTOFFSETS_DEF_HPP
An abstract interface for graphs accessed by rows.
Node::device_type device_type
The Kokkos device type.
base_type::impl_scalar_type impl_scalar_type
The type used internally in place of Scalar.
Declaration of a function that prints strings from each process.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator, in rank order.
LO getLocalDiagCopyWithoutOffsetsNotFillComplete(::Tpetra::Vector< SC, LO, GO, NT > &diag, const ::Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool debug=false)
Given a locally indexed, global sparse matrix, extract the matrix&#39;s diagonal entries into a Tpetra::V...
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
A read-only, row-oriented interface to a sparse matrix.
A distributed dense vector.
base_type::map_type map_type
The type of the Map specialization used by this class.
::Tpetra::Details::LocalMap< local_ordinal_type, global_ordinal_type, device_type > local_map_type
Type of the &quot;local&quot; Map.