42 #ifndef TPETRA_ROWMATRIXTRANSPOSER_DEF_HPP
43 #define TPETRA_ROWMATRIXTRANSPOSER_DEF_HPP
45 #include "Tpetra_CrsMatrix.hpp"
46 #include "Tpetra_Export.hpp"
47 #include "Tpetra_Import.hpp"
48 #include "Teuchos_ParameterList.hpp"
49 #include "Teuchos_TimeMonitor.hpp"
53 template<
class Scalar,
59 : origMatrix_(origMatrix), label_(label) {}
61 template<
class Scalar,
65 Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
71 RCP<crs_matrix_type> transMatrixWithSharedRows = createTransposeLocal (params);
73 #ifdef HAVE_TPETRA_MMM_TIMINGS
74 std::string prefix = std::string(
"Tpetra ")+ label_ + std::string(
": ");
75 using Teuchos::TimeMonitor;
76 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"Transpose TAFC"))));
82 RCP<const Export<LocalOrdinal,GlobalOrdinal,Node> > exporter =
83 transMatrixWithSharedRows->getGraph ()->getExporter ();
84 if (exporter.is_null ()) {
85 return transMatrixWithSharedRows;
88 Teuchos::ParameterList labelList;
89 #ifdef HAVE_TPETRA_MMM_TIMINGS
90 labelList.set(
"Timer Label",label_);
92 if(!params.is_null()) labelList.set(
"compute global constants",params->get(
"compute global constants",
true));
94 return exportAndFillCompleteCrsMatrix<crs_matrix_type> (transMatrixWithSharedRows, *exporter,Teuchos::null,Teuchos::null,Teuchos::rcp(&labelList,
false));
103 template<
class Scalar,
107 Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
111 using Teuchos::Array;
112 using Teuchos::ArrayRCP;
113 using Teuchos::ArrayView;
116 using Teuchos::rcp_dynamic_cast;
117 typedef LocalOrdinal LO;
118 typedef GlobalOrdinal GO;
122 #ifdef HAVE_TPETRA_MMM_TIMINGS
123 std::string prefix = std::string(
"Tpetra ")+ label_ + std::string(
": ");
124 using Teuchos::TimeMonitor;
125 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"Transpose Local"))));
130 RCP<const import_type> myImport;
131 RCP<const export_type> myExport;
132 if (! origMatrix_->getGraph ()->getImporter ().is_null ()) {
133 myExport = rcp (
new export_type (*origMatrix_->getGraph ()->getImporter ()));
135 if (! origMatrix_->getGraph ()->getExporter ().is_null ()) {
136 myImport = rcp (
new import_type (*origMatrix_->getGraph ()->getExporter ()));
143 size_t numLocalCols = origMatrix_->getNodeNumCols();
144 size_t numLocalRows = origMatrix_->getNodeNumRows();
145 size_t numLocalNnz = origMatrix_->getNodeNumEntries();
147 RCP<const crs_matrix_type> crsMatrix =
150 RCP<crs_matrix_type> transMatrixWithSharedRows;
151 if (crsMatrix != Teuchos::null) {
154 using row_map_type =
typename local_matrix_type::row_map_type::non_const_type;
155 using index_type =
typename local_matrix_type::index_type::non_const_type;
156 using values_type =
typename local_matrix_type::values_type::non_const_type;
157 using execution_space =
typename local_matrix_type::execution_space;
161 auto lclGraph = lclMatrix.graph;
163 using range_type = Kokkos::RangePolicy<LO,execution_space>;
166 row_map_type t_rows(
"transpose_rows", numLocalCols+1);
167 Kokkos::parallel_for(
"compute_number_of_indices_per_column", range_type(0, numLocalRows),
168 KOKKOS_LAMBDA(
const LO row) {
169 auto rowView = lclGraph.rowConst(row);
170 auto length = rowView.length;
172 for (decltype(length) colID = 0; colID < length; colID++) {
173 auto col = rowView(colID);
174 Kokkos::atomic_fetch_add(&t_rows[col], 1);
179 Kokkos::parallel_scan(
"compute_transpose_row_offsets", range_type(0, numLocalCols+1),
180 KOKKOS_LAMBDA(
const LO i, LO& update,
const bool& final_pass) {
181 const LO val = t_rows(i);
187 row_map_type offsets(
"transpose_row_offsets_aux", numLocalCols+1);
190 index_type t_cols(
"transpose_cols", numLocalNnz);
191 values_type t_vals(
"transpose_vals", numLocalNnz);
192 Kokkos::parallel_for(
"compute_transposed_rows", range_type(0, numLocalRows),
193 KOKKOS_LAMBDA(
const LO row) {
194 auto rowView = lclMatrix.rowConst(row);
195 auto length = rowView.length;
197 for (decltype(length) colID = 0; colID < length; colID++) {
198 auto col = rowView.colidx(colID);
200 LO insert_pos = Kokkos::atomic_fetch_add(&offsets[col], 1);
202 t_cols[insert_pos] = row;
203 t_vals[insert_pos] = rowView.value(colID);
207 local_matrix_type lclTransposeMatrix(
"transpose", numLocalCols, numLocalRows, numLocalNnz, t_vals, t_rows, t_cols);
209 transMatrixWithSharedRows =
211 origMatrix_->getColMap (), origMatrix_->getRowMap (),
212 origMatrix_->getRangeMap (), origMatrix_->getDomainMap ()));
218 Array<size_t> CurrentStart(numLocalCols,0);
219 ArrayView<const LO> localIndices;
220 ArrayView<const Scalar> localValues;
222 for (
size_t i=0; i<numLocalRows; ++i) {
223 const size_t numEntriesInRow = origMatrix_->getNumEntriesInLocalRow(i);
224 origMatrix_->getLocalRowView(i, localIndices, localValues);
225 for (
size_t j=0; j<numEntriesInRow; ++j) {
226 ++CurrentStart[ localIndices[j] ];
232 ArrayRCP<size_t> rowptr_rcp(numLocalCols+1);
233 ArrayRCP<LO> colind_rcp(numLocalNnz);
234 ArrayRCP<Scalar> values_rcp(numLocalNnz);
237 ArrayView<size_t> TransRowptr = rowptr_rcp();
238 ArrayView<LO> TransColind = colind_rcp();
239 ArrayView<Scalar> TransValues = values_rcp();
243 for (
size_t i=1; i<numLocalCols+1; ++i) TransRowptr[i] = CurrentStart[i-1] + TransRowptr[i-1];
244 for (
size_t i=0; i<numLocalCols; ++i) CurrentStart[i] = TransRowptr[i];
248 for (
size_t i=0; i<numLocalRows; ++i) {
249 const size_t numEntriesInRow = origMatrix_->getNumEntriesInLocalRow (i);
250 origMatrix_->getLocalRowView(i, localIndices, localValues);
252 for (
size_t j=0; j<numEntriesInRow; ++j) {
253 size_t idx = CurrentStart[localIndices[j]];
254 TransColind[idx] = Teuchos::as<LO>(i);
255 TransValues[idx] = localValues[j];
256 ++CurrentStart[localIndices[j]];
261 transMatrixWithSharedRows =
263 origMatrix_->getRowMap (), 0));
264 transMatrixWithSharedRows->setAllValues (rowptr_rcp, colind_rcp, values_rcp);
267 Teuchos::ParameterList eParams;
268 #ifdef HAVE_TPETRA_MMM_TIMINGS
269 eParams.set(
"Timer Label",label_);
271 if (!params.is_null())
272 eParams.set(
"compute global constants", params->get(
"compute global constants",
true));
274 transMatrixWithSharedRows->expertStaticFillComplete (origMatrix_->getRangeMap (),
275 origMatrix_->getDomainMap (),
276 myImport, myExport,rcp(&eParams,
false));
280 return transMatrixWithSharedRows;
288 #define TPETRA_ROWMATRIXTRANSPOSER_INSTANT(SCALAR,LO,GO,NODE) \
289 template class RowMatrixTransposer< SCALAR, LO , GO , NODE >;
RowMatrixTransposer(const Teuchos::RCP< const crs_matrix_type > &origMatrix, const std::string &label=std::string())
Constructor that takes the matrix to transpose.
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
local_matrix_type getLocalMatrix() const
The local sparse matrix.
Teuchos::RCP< crs_matrix_type > createTransposeLocal(const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Compute and return the transpose of the matrix given to the constructor.
Teuchos::RCP< crs_matrix_type > createTranspose(const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Compute and return the transpose of the matrix given to the constructor.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, execution_space, void, typename local_graph_type::size_type > local_matrix_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...