42 #ifndef TPETRA_ROWMATRIXTRANSPOSER_DEF_HPP
43 #define TPETRA_ROWMATRIXTRANSPOSER_DEF_HPP
45 #include "Tpetra_CrsMatrix.hpp"
46 #include "Tpetra_Export.hpp"
49 #include "Teuchos_ParameterList.hpp"
50 #include "Teuchos_TimeMonitor.hpp"
54 template<
class Scalar,
60 const std::string& label)
61 : origMatrix_ (origMatrix), label_ (label)
64 template<
class Scalar,
68 Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
74 RCP<crs_matrix_type> transMatrixWithSharedRows = createTransposeLocal (params);
76 #ifdef HAVE_TPETRA_MMM_TIMINGS
77 const std::string prefix = std::string (
"Tpetra ") + label_ +
": ";
78 using Teuchos::TimeMonitor;
79 TimeMonitor MM (*TimeMonitor::getNewTimer (prefix +
"Transpose TAFC"));
86 RCP<const export_type> exporter =
87 transMatrixWithSharedRows->getGraph ()->getExporter ();
88 if (exporter.is_null ()) {
89 return transMatrixWithSharedRows;
92 Teuchos::ParameterList labelList;
93 #ifdef HAVE_TPETRA_MMM_TIMINGS
94 labelList.set(
"Timer Label", label_);
96 if(! params.is_null ()) {
97 const char paramName[] =
"compute global constants";
98 labelList.set (paramName, params->get (paramName,
true));
103 return exportAndFillCompleteCrsMatrix<crs_matrix_type>
104 (transMatrixWithSharedRows, *exporter, Teuchos::null,
105 Teuchos::null, Teuchos::rcpFromRef (labelList));
114 template<
class Scalar,
118 Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
122 using Teuchos::Array;
123 using Teuchos::ArrayRCP;
124 using Teuchos::ArrayView;
127 using Teuchos::rcp_dynamic_cast;
128 using LO = LocalOrdinal;
129 using GO = GlobalOrdinal;
134 #ifdef HAVE_TPETRA_MMM_TIMINGS
135 std::string prefix = std::string(
"Tpetra ") + label_ +
": ";
136 using Teuchos::TimeMonitor;
137 TimeMonitor MM (*TimeMonitor::getNewTimer (prefix +
"Transpose Local"));
140 const bool sort = [&] () {
141 constexpr
bool sortDefault =
true;
142 const char sortParamName[] =
"sort";
143 return params.get () ==
nullptr ? sortDefault :
144 params->get (sortParamName, sortDefault);
147 const LO lclNumCols (origMatrix_->getNodeNumCols ());
148 const LO lclNumRows (origMatrix_->getNodeNumRows ());
149 const size_t nnz (origMatrix_->getNodeNumEntries ());
151 RCP<const crs_matrix_type> crsMatrix =
153 if (crsMatrix.is_null ()) {
155 if (rowMap->isOneToOne ()) {
156 Teuchos::Array<size_t> numEntPerRow (lclNumRows);
157 for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
158 numEntPerRow[lclRow] = origMatrix_->getNumEntriesInLocalRow (lclRow);
160 auto colMap = origMatrix_->getColMap ();
162 RCP<crs_matrix_type> crsMatrix_nc =
166 import_type imp (rowMap, rowMap);
168 crsMatrix_nc->fillComplete (origMatrix_->getDomainMap (),
169 origMatrix_->getRangeMap ());
170 crsMatrix = crsMatrix_nc;
173 TEUCHOS_ASSERT(
false );
179 using offset_type =
typename local_graph_type::size_type;
180 using row_map_type =
typename local_matrix_type::row_map_type::non_const_type;
181 using index_type =
typename local_matrix_type::index_type::non_const_type;
182 using values_type =
typename local_matrix_type::values_type::non_const_type;
183 using execution_space =
typename local_matrix_type::execution_space;
185 local_matrix_type lclMatrix = crsMatrix->getLocalMatrix ();
186 local_graph_type lclGraph = lclMatrix.graph;
190 Kokkos::View<LO*, DT> t_counts (
"transpose row counts", lclNumCols);
191 using range_type = Kokkos::RangePolicy<LO, execution_space>;
193 (
"Compute row counts of local transpose",
194 range_type (0, lclNumRows),
195 KOKKOS_LAMBDA (
const LO row) {
196 auto rowView = lclGraph.rowConst(row);
197 const LO length = rowView.length;
199 for (LO colID = 0; colID < length; ++colID) {
200 const LO col = rowView(colID);
201 Kokkos::atomic_fetch_add (&t_counts[col], LO (1));
205 using Kokkos::view_alloc;
206 using Kokkos::WithoutInitializing;
207 row_map_type t_offsets
208 (view_alloc (
"transpose ptr", WithoutInitializing), lclNumCols + 1);
215 (view_alloc (
"transpose lcl ind", WithoutInitializing), nnz);
217 (view_alloc (
"transpose val", WithoutInitializing), nnz);
219 (
"Compute local transpose",
220 range_type (0, lclNumRows),
221 KOKKOS_LAMBDA (
const LO row) {
222 auto rowView = lclMatrix.rowConst(row);
223 const LO length = rowView.length;
225 for (LO colID = 0; colID < length; colID++) {
226 const LO col = rowView.colidx(colID);
227 const offset_type beg = t_offsets[col];
229 Kokkos::atomic_fetch_sub (&t_counts[col], LO (1));
230 const LO len (t_offsets[col+1] - beg);
231 const offset_type insert_pos = beg + (len - old_count);
232 t_cols[insert_pos] = row;
233 t_vals[insert_pos] = rowView.value(colID);
246 (
"Sort rows of local transpose",
247 range_type (0, lclNumCols),
248 KOKKOS_LAMBDA (
const LO lclCol) {
249 const offset_type beg = t_offsets[lclCol];
250 const LO len (t_offsets[lclCol+1] - t_offsets[lclCol]);
252 LO* cols_beg = t_cols.data () + beg;
253 IST* vals_beg = t_vals.data () + beg;
258 local_matrix_type lclTransposeMatrix (
"transpose", lclNumCols,
260 t_vals, t_offsets, t_cols);
264 const auto origExport = origMatrix_->getGraph ()->getExporter ();
265 RCP<const import_type> myImport = origExport.is_null () ?
266 Teuchos::null : rcp (
new import_type (*origExport));
267 const auto origImport = origMatrix_->getGraph ()->getImporter ();
268 RCP<const export_type> myExport = origImport.is_null () ?
269 Teuchos::null : rcp (
new export_type (*origImport));
271 RCP<Teuchos::ParameterList> graphParams = Teuchos::null;
273 graphParams = rcp(
new Teuchos::ParameterList);
274 graphParams->set(
"sorted",
false);
278 origMatrix_->getColMap (),
279 origMatrix_->getRowMap (),
280 origMatrix_->getRangeMap (),
281 origMatrix_->getDomainMap (),
282 myImport, myExport, graphParams));
290 #define TPETRA_ROWMATRIXTRANSPOSER_INSTANT(SCALAR,LO,GO,NODE) \
291 template class RowMatrixTransposer< SCALAR, LO , GO , NODE >;
RowMatrixTransposer(const Teuchos::RCP< const crs_matrix_type > &origMatrix, const std::string &label=std::string())
Constructor that takes the matrix to transpose.
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_type::size_type > local_matrix_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
Declaration and definition of functions for sorting "short" arrays of keys and corresponding values...
typename Kokkos::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
typename crs_graph_type::local_graph_type local_graph_type
The part of the sparse matrix's graph on each MPI process.
KOKKOS_FUNCTION void shellSortKeysAndValues(KeyType keys[], ValueType values[], const IndexType n)
Shellsort (yes, it's one word) the input array keys, and apply the resulting permutation to the input...
typename Node::device_type device_type
The Kokkos device type.
Teuchos::RCP< crs_matrix_type > createTransposeLocal(const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Compute and return the transpose of the matrix given to the constructor.
Replace existing values with new values.
Teuchos::RCP< crs_matrix_type > createTranspose(const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Compute and return the transpose of the matrix given to the constructor.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.