Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_RowMatrixTransposer_def.hpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_ROWMATRIXTRANSPOSER_DEF_HPP
43 #define TPETRA_ROWMATRIXTRANSPOSER_DEF_HPP
44 
45 #include "Tpetra_CrsMatrix.hpp"
46 #include "Tpetra_Export.hpp"
49 #include "Teuchos_ParameterList.hpp"
50 #include "Teuchos_TimeMonitor.hpp"
51 
52 namespace Tpetra {
53 
54 template<class Scalar,
55  class LocalOrdinal,
56  class GlobalOrdinal,
57  class Node>
59 RowMatrixTransposer (const Teuchos::RCP<const crs_matrix_type>& origMatrix,
60  const std::string& label)
61  : origMatrix_ (origMatrix), label_ (label)
62 {}
63 
64 template<class Scalar,
65  class LocalOrdinal,
66  class GlobalOrdinal,
67  class Node>
68 Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
70 createTranspose (const Teuchos::RCP<Teuchos::ParameterList> &params)
71 {
72  using Teuchos::RCP;
73  // Do the local transpose
74  RCP<crs_matrix_type> transMatrixWithSharedRows = createTransposeLocal (params);
75 
76 #ifdef HAVE_TPETRA_MMM_TIMINGS
77  const std::string prefix = std::string ("Tpetra ") + label_ + ": ";
78  using Teuchos::TimeMonitor;
79  TimeMonitor MM (*TimeMonitor::getNewTimer (prefix + "Transpose TAFC"));
80 #endif
81 
82  // If transMatrixWithSharedRows has an exporter, that's what we
83  // want. If it doesn't, the rows aren't actually shared, and we're
84  // done!
86  RCP<const export_type> exporter =
87  transMatrixWithSharedRows->getGraph ()->getExporter ();
88  if (exporter.is_null ()) {
89  return transMatrixWithSharedRows;
90  }
91  else {
92  Teuchos::ParameterList labelList;
93 #ifdef HAVE_TPETRA_MMM_TIMINGS
94  labelList.set("Timer Label", label_);
95 #endif
96  if(! params.is_null ()) {
97  const char paramName[] = "compute global constants";
98  labelList.set (paramName, params->get (paramName, true));
99  }
100  // Use the Export object to do a fused Export and fillComplete.
101  // This always sorts the local matrix after communication, so
102  // no need to set "sorted = false" in parameters.
103  return exportAndFillCompleteCrsMatrix<crs_matrix_type>
104  (transMatrixWithSharedRows, *exporter, Teuchos::null,
105  Teuchos::null, Teuchos::rcpFromRef (labelList));
106  }
107 }
108 
109 
110 // mfh 03 Feb 2013: In a definition outside the class like this, the
111 // return value is considered outside the class scope (for things like
112 // resolving typedefs), but the arguments are considered inside the
113 // class scope.
114 template<class Scalar,
115  class LocalOrdinal,
116  class GlobalOrdinal,
117  class Node>
118 Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
120 createTransposeLocal (const Teuchos::RCP<Teuchos::ParameterList>& params)
121 {
122  using Teuchos::Array;
123  using Teuchos::ArrayRCP;
124  using Teuchos::ArrayView;
125  using Teuchos::RCP;
126  using Teuchos::rcp;
127  using Teuchos::rcp_dynamic_cast;
128  using LO = LocalOrdinal;
129  using GO = GlobalOrdinal;
131  using import_type = Tpetra::Import<LO, GO, Node>;
132  using export_type = Tpetra::Export<LO, GO, Node>;
133 
134 #ifdef HAVE_TPETRA_MMM_TIMINGS
135  std::string prefix = std::string("Tpetra ") + label_ + ": ";
136  using Teuchos::TimeMonitor;
137  TimeMonitor MM (*TimeMonitor::getNewTimer (prefix + "Transpose Local"));
138 #endif
139 
140  const bool sort = [&] () {
141  constexpr bool sortDefault = true; // see #4607 discussion
142  const char sortParamName[] = "sort";
143  return params.get () == nullptr ? sortDefault :
144  params->get (sortParamName, sortDefault);
145  } ();
146 
147  const LO lclNumCols (origMatrix_->getNodeNumCols ());
148  const LO lclNumRows (origMatrix_->getNodeNumRows ());
149  const size_t nnz (origMatrix_->getNodeNumEntries ());
150 
151  RCP<const crs_matrix_type> crsMatrix =
152  rcp_dynamic_cast<const crs_matrix_type> (origMatrix_);
153  if (crsMatrix.is_null ()) {
154  auto rowMap = origMatrix_->getRowMap ();
155  if (rowMap->isOneToOne ()) {
156  Teuchos::Array<size_t> numEntPerRow (lclNumRows);
157  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
158  numEntPerRow[lclRow] = origMatrix_->getNumEntriesInLocalRow (lclRow);
159  }
160  auto colMap = origMatrix_->getColMap ();
161 
162  RCP<crs_matrix_type> crsMatrix_nc =
163  rcp (new crs_matrix_type (rowMap, colMap, numEntPerRow ()));
164 
165  // When source & target Maps are same, Import just copies.
166  import_type imp (rowMap, rowMap);
167  crsMatrix_nc->doImport (*origMatrix_, imp, Tpetra::REPLACE);
168  crsMatrix_nc->fillComplete (origMatrix_->getDomainMap (),
169  origMatrix_->getRangeMap ());
170  crsMatrix = crsMatrix_nc;
171  }
172  else {
173  TEUCHOS_ASSERT( false ); // not implemented (it wasn't before)
174  }
175  }
176 
177  using local_matrix_type = typename crs_matrix_type::local_matrix_type;
178  using local_graph_type = typename crs_matrix_type::local_graph_type;
179  using offset_type = typename local_graph_type::size_type;
180  using row_map_type = typename local_matrix_type::row_map_type::non_const_type;
181  using index_type = typename local_matrix_type::index_type::non_const_type;
182  using values_type = typename local_matrix_type::values_type::non_const_type;
183  using execution_space = typename local_matrix_type::execution_space;
184 
185  local_matrix_type lclMatrix = crsMatrix->getLocalMatrix ();
186  local_graph_type lclGraph = lclMatrix.graph;
187 
188  // Determine how many nonzeros there are per row in the transpose.
189  using DT = typename crs_matrix_type::device_type;
190  Kokkos::View<LO*, DT> t_counts ("transpose row counts", lclNumCols);
191  using range_type = Kokkos::RangePolicy<LO, execution_space>;
192  Kokkos::parallel_for
193  ("Compute row counts of local transpose",
194  range_type (0, lclNumRows),
195  KOKKOS_LAMBDA (const LO row) {
196  auto rowView = lclGraph.rowConst(row);
197  const LO length = rowView.length;
198 
199  for (LO colID = 0; colID < length; ++colID) {
200  const LO col = rowView(colID);
201  Kokkos::atomic_fetch_add (&t_counts[col], LO (1));
202  }
203  });
204 
205  using Kokkos::view_alloc;
206  using Kokkos::WithoutInitializing;
207  row_map_type t_offsets
208  (view_alloc ("transpose ptr", WithoutInitializing), lclNumCols + 1);
209 
210  // TODO (mfh 10 Jul 2019): This returns the sum of all counts,
211  // which could be useful for checking nnz.
212  (void) Details::computeOffsetsFromCounts (t_offsets, t_counts);
213 
214  index_type t_cols
215  (view_alloc ("transpose lcl ind", WithoutInitializing), nnz);
216  values_type t_vals
217  (view_alloc ("transpose val", WithoutInitializing), nnz);
218  Kokkos::parallel_for
219  ("Compute local transpose",
220  range_type (0, lclNumRows),
221  KOKKOS_LAMBDA (const LO row) {
222  auto rowView = lclMatrix.rowConst(row);
223  const LO length = rowView.length;
224 
225  for (LO colID = 0; colID < length; colID++) {
226  const LO col = rowView.colidx(colID);
227  const offset_type beg = t_offsets[col];
228  const LO old_count =
229  Kokkos::atomic_fetch_sub (&t_counts[col], LO (1));
230  const LO len (t_offsets[col+1] - beg);
231  const offset_type insert_pos = beg + (len - old_count);
232  t_cols[insert_pos] = row;
233  t_vals[insert_pos] = rowView.value(colID);
234  }
235  });
236 
237  // Invariant: At this point, all entries of t_counts are zero.
238  // This means we can use it to store new post-merge counts.
239 
240  if (sort) {
241  // NOTE (mfh 11 Jul 2019) Merging is unnecessary: above
242  // parallel_for visits each row of the original matrix once, so
243  // there can be no duplicate column indices in the transpose.
245  Kokkos::parallel_for
246  ("Sort rows of local transpose",
247  range_type (0, lclNumCols),
248  KOKKOS_LAMBDA (const LO lclCol) {
249  const offset_type beg = t_offsets[lclCol];
250  const LO len (t_offsets[lclCol+1] - t_offsets[lclCol]);
251 
252  LO* cols_beg = t_cols.data () + beg;
253  IST* vals_beg = t_vals.data () + beg;
254  shellSortKeysAndValues (cols_beg, vals_beg, len);
255  });
256  }
257 
258  local_matrix_type lclTransposeMatrix ("transpose", lclNumCols,
259  lclNumRows, nnz,
260  t_vals, t_offsets, t_cols);
261 
262  // Prebuild the importers and exporters the no-communication way,
263  // flipping the importers and exporters around.
264  const auto origExport = origMatrix_->getGraph ()->getExporter ();
265  RCP<const import_type> myImport = origExport.is_null () ?
266  Teuchos::null : rcp (new import_type (*origExport));
267  const auto origImport = origMatrix_->getGraph ()->getImporter ();
268  RCP<const export_type> myExport = origImport.is_null () ?
269  Teuchos::null : rcp (new export_type (*origImport));
270 
271  RCP<Teuchos::ParameterList> graphParams = Teuchos::null;
272  if(!sort) {
273  graphParams = rcp(new Teuchos::ParameterList);
274  graphParams->set("sorted", false);
275  }
276 
277  return rcp (new crs_matrix_type (lclTransposeMatrix,
278  origMatrix_->getColMap (),
279  origMatrix_->getRowMap (),
280  origMatrix_->getRangeMap (),
281  origMatrix_->getDomainMap (),
282  myImport, myExport, graphParams));
283 }
284 //
285 // Explicit instantiation macro
286 //
287 // Must be expanded from within the Tpetra namespace!
288 //
289 
290 #define TPETRA_ROWMATRIXTRANSPOSER_INSTANT(SCALAR,LO,GO,NODE) \
291  template class RowMatrixTransposer< SCALAR, LO , GO , NODE >;
292 
293 } // namespace Tpetra
294 
295 #endif
RowMatrixTransposer(const Teuchos::RCP< const crs_matrix_type > &origMatrix, const std::string &label=std::string())
Constructor that takes the matrix to transpose.
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_type::size_type > local_matrix_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
Declaration and definition of functions for sorting &quot;short&quot; arrays of keys and corresponding values...
typename Kokkos::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
typename crs_graph_type::local_graph_type local_graph_type
The part of the sparse matrix&#39;s graph on each MPI process.
KOKKOS_FUNCTION void shellSortKeysAndValues(KeyType keys[], ValueType values[], const IndexType n)
Shellsort (yes, it&#39;s one word) the input array keys, and apply the resulting permutation to the input...
typename Node::device_type device_type
The Kokkos device type.
Teuchos::RCP< crs_matrix_type > createTransposeLocal(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Compute and return the transpose of the matrix given to the constructor.
Replace existing values with new values.
Teuchos::RCP< crs_matrix_type > createTranspose(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Compute and return the transpose of the matrix given to the constructor.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.