10 #ifndef TPETRA_CRSGRAPHTRANSPOSER_DEF_HPP
11 #define TPETRA_CRSGRAPHTRANSPOSER_DEF_HPP
13 #include "Tpetra_CrsGraph.hpp"
14 #include "Tpetra_Export.hpp"
16 #include "Tpetra_Details_makeColMap.hpp"
18 #include "Teuchos_ParameterList.hpp"
19 #include "Teuchos_TimeMonitor.hpp"
20 #include "KokkosSparse_Utils.hpp"
21 #include "KokkosKernels_Handle.hpp"
22 #include "KokkosSparse_spadd.hpp"
27 typename LocalIndicesType,
28 typename GlobalIndicesType,
30 struct ConvertLocalToGlobalFunctor
32 ConvertLocalToGlobalFunctor(
33 const LocalIndicesType& colindsOrig_,
34 const GlobalIndicesType& colindsConverted_,
35 const ColMapType& colmap_) :
36 colindsOrig (colindsOrig_),
37 colindsConverted (colindsConverted_),
40 KOKKOS_INLINE_FUNCTION
void
41 operator() (
const GO i)
const
43 colindsConverted(i) = colmap.getGlobalElement(colindsOrig(i));
45 LocalIndicesType colindsOrig;
46 GlobalIndicesType colindsConverted;
50 template<
class LO,
class GO,
class LOView,
class GOView,
class LocalMap>
51 struct ConvertGlobalToLocalFunctor
53 ConvertGlobalToLocalFunctor(LOView& lids_,
const GOView& gids_,
const LocalMap localColMap_)
54 : lids(lids_), gids(gids_), localColMap(localColMap_)
57 KOKKOS_FUNCTION
void operator() (
const GO i)
const
59 lids(i) = localColMap.getLocalElement(gids(i));
64 const LocalMap localColMap;
68 template <
typename size_type,
typename ordinal_type,
69 typename ArowptrsT,
typename BrowptrsT,
typename CrowptrsT,
70 typename AcolindsT,
typename BcolindsT,
typename CcolindsT>
71 struct SortedNumericIndicesOnlyFunctor {
73 SortedNumericIndicesOnlyFunctor(
const ArowptrsT& Arowptrs_,
74 const BrowptrsT& Browptrs_,
75 const CrowptrsT& Crowptrs_,
76 const AcolindsT& Acolinds_,
77 const BcolindsT& Bcolinds_,
78 const CcolindsT& Ccolinds_)
79 : Arowptrs(Arowptrs_),
84 Ccolinds(Ccolinds_) {}
86 KOKKOS_INLINE_FUNCTION
void operator()(
const ordinal_type i)
const
88 const ordinal_type ORDINAL_MAX = Kokkos::ArithTraits<ordinal_type>::max();
93 size_type Arowstart = Arowptrs(i);
94 size_type Arowlen = Arowptrs(i + 1) - Arowstart;
95 size_type Browstart = Browptrs(i);
96 size_type Browlen = Browptrs(i + 1) - Browstart;
97 ordinal_type Acol = (Arowlen == 0) ? ORDINAL_MAX : Acolinds(Arowstart);
98 ordinal_type Bcol = (Browlen == 0) ? ORDINAL_MAX : Bcolinds(Browstart);
99 size_type Coffset = Crowptrs(i);
100 while (Acol != ORDINAL_MAX || Bcol != ORDINAL_MAX)
102 ordinal_type Ccol = (Acol < Bcol) ? Acol : Bcol;
109 Acol = Acolinds(Arowstart + ai);
117 Bcol = Bcolinds(Browstart + bi);
119 Ccolinds(Coffset) = Ccol;
124 const ArowptrsT Arowptrs;
125 const BrowptrsT Browptrs;
126 const CrowptrsT Crowptrs;
127 const AcolindsT Acolinds;
128 const BcolindsT Bcolinds;
132 template <
typename size_type,
typename ordinal_type,
133 typename ArowptrsT,
typename BrowptrsT,
typename CrowptrsT,
134 typename AcolindsT,
typename BcolindsT,
typename CcolindsT>
135 struct UnsortedNumericIndicesOnlyFunctor {
137 UnsortedNumericIndicesOnlyFunctor(
138 const ArowptrsT Arowptrs_,
const BrowptrsT Browptrs_,
const CrowptrsT Crowptrs_,
139 const AcolindsT Acolinds_,
const BcolindsT Bcolinds_, CcolindsT Ccolinds_,
140 const CcolindsT Apos_,
const CcolindsT Bpos_)
141 : Arowptrs(Arowptrs_),
150 KOKKOS_INLINE_FUNCTION
void operator()(
const ordinal_type i)
const {
151 size_type CrowStart = Crowptrs(i);
152 size_type ArowStart = Arowptrs(i);
153 size_type ArowEnd = Arowptrs(i + 1);
154 size_type BrowStart = Browptrs(i);
155 size_type BrowEnd = Browptrs(i + 1);
157 for (size_type j = ArowStart; j < ArowEnd; j++) {
158 Ccolinds(CrowStart + Apos(j)) = Acolinds(j);
161 for (size_type j = BrowStart; j < BrowEnd; j++) {
162 Ccolinds(CrowStart + Bpos(j)) = Bcolinds(j);
165 const ArowptrsT Arowptrs;
166 const BrowptrsT Browptrs;
167 const CrowptrsT Crowptrs;
168 const AcolindsT Acolinds;
169 const BcolindsT Bcolinds;
171 const CcolindsT Apos;
172 const CcolindsT Bpos;
176 template<
class LocalOrdinal,
181 const std::string& label)
182 : origGraph_ (origGraph), label_ (label)
185 template<
class LocalOrdinal,
188 Teuchos::RCP<CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
190 symmetrize (
const Teuchos::RCP<Teuchos::ParameterList> ¶ms)
193 using device_type =
typename Node::device_type;
194 using execution_space =
typename device_type::execution_space;
195 using range_type = Kokkos::RangePolicy<execution_space, size_t>;
197 using impl_scalar_type = ::Tpetra::Details::DefaultTypes::scalar_type;
198 using row_ptrs_array =
typename local_graph_device_type::row_map_type::non_const_type ;
199 using col_inds_array =
typename local_graph_device_type::entries_type::non_const_type;
201 using global_col_inds_array =
typename Kokkos::View<GlobalOrdinal*, device_type>;
203 auto graph = origGraph_;
204 auto domain_map = graph->getDomainMap();
205 auto range_map = graph->getRangeMap();
206 auto row_map = graph->getRowMap();
207 auto col_map = graph->getColMap();
208 RCP<const map_type> col_map_sym;
209 RCP<const Import<LocalOrdinal,GlobalOrdinal,Node> > importer;
211 TEUCHOS_ASSERT(domain_map->isSameAs(*range_map));
212 TEUCHOS_ASSERT(domain_map->isSameAs(*row_map));
215 RCP<crs_graph_type> graphT = createTranspose (params);
217 auto col_map_T = graphT->getColMap();
218 TEUCHOS_ASSERT(!col_map_T.is_null());
219 TEUCHOS_ASSERT(domain_map->isSameAs(*graphT->getDomainMap()));
221 bool graphSorted = graph->isSorted();
222 bool graphTSorted = graphT->isSorted();
223 bool sorted = graphSorted && graphTSorted;
224 bool matchingColMaps = col_map->isSameAs(*col_map_T);
226 auto lclGraph = graph->getLocalGraphDevice();
227 auto lclGraphT = graphT->getLocalGraphDevice();
229 using KKH_LO = KokkosKernels::Experimental::KokkosKernelsHandle<size_t, LocalOrdinal, impl_scalar_type,
230 typename Node::execution_space,
typename Node::memory_space,
typename Node::memory_space>;
231 using KKH_GO = KokkosKernels::Experimental::KokkosKernelsHandle<size_t, GlobalOrdinal, impl_scalar_type,
232 typename Node::execution_space,
typename Node::memory_space,
typename Node::memory_space>;
234 auto rowptrs = lclGraph.row_map;
235 auto rowptrsT = lclGraphT.row_map;
236 auto colinds = lclGraph.entries;
237 auto colindsT = lclGraphT.entries;
239 auto nrows = rowptrs.extent(0) - 1;
240 auto rowptrsSym = row_ptrs_array(Kokkos::ViewAllocateWithoutInitializing(
"row ptrs sym"), nrows + 1);
242 col_inds_array colindsSym;
244 if(!matchingColMaps) {
246 auto lclColmap = col_map->getLocalMap();
247 global_col_inds_array colindsConverted(Kokkos::ViewAllocateWithoutInitializing(
"colinds (converted)"), colinds.extent(0));
248 ConvertLocalToGlobalFunctor<GlobalOrdinal, col_inds_array, global_col_inds_array, local_map_type> convert(colinds, colindsConverted, lclColmap);
249 Kokkos::parallel_for(
"colInds (converted)", range_type(0, colinds.extent(0)), convert);
252 auto lclColmapT = col_map_T->getLocalMap();
253 global_col_inds_array colindsTConverted(Kokkos::ViewAllocateWithoutInitializing(
"colindsT (converted)"), colindsT.extent(0));
254 ConvertLocalToGlobalFunctor<GlobalOrdinal, col_inds_array, global_col_inds_array, local_map_type> convertT(colindsT, colindsTConverted, lclColmapT);
255 Kokkos::parallel_for(
"colIndsT (converted)", range_type(0, colindsT.extent(0)), convertT);
259 handle.create_spadd_handle(
false);
260 auto addHandle = handle.get_spadd_handle();
262 global_col_inds_array globalColindsSym;
264 KokkosSparse::Experimental::spadd_symbolic
266 #
if KOKKOSKERNELS_VERSION >= 40299
267 nrows, graph->getGlobalNumCols(),
269 rowptrs, colindsConverted, rowptrsT, colindsTConverted, rowptrsSym);
270 globalColindsSym = global_col_inds_array(Kokkos::ViewAllocateWithoutInitializing(
"global colinds sym"), addHandle->get_c_nnz());
272 UnsortedNumericIndicesOnlyFunctor<
273 size_t, GlobalOrdinal,
274 typename row_ptrs_array::const_type,
typename row_ptrs_array::const_type, row_ptrs_array,
275 typename global_col_inds_array::const_type,
typename global_col_inds_array::const_type, global_col_inds_array>
276 unsortedNumeric(rowptrs, rowptrsT, rowptrsSym,
277 colindsConverted, colindsTConverted, globalColindsSym,
278 addHandle->get_a_pos(), addHandle->get_b_pos());
279 Kokkos::parallel_for(
"KokkosSparse::SpAdd:Numeric::InputNotSorted",
280 range_type(0, nrows), unsortedNumeric);
283 Tpetra::Details::makeColMap<LocalOrdinal, GlobalOrdinal, Node>
284 (col_map_sym, domain_map, globalColindsSym);
287 auto lclColmapSym = col_map_sym->getLocalMap();
288 colindsSym = col_inds_array(
"colindsSym", globalColindsSym.extent(0));
289 ConvertGlobalToLocalFunctor<LocalOrdinal, GlobalOrdinal, col_inds_array, global_col_inds_array, typename map_type::local_map_type> convertSym(colindsSym, globalColindsSym, lclColmapSym);
290 Kokkos::parallel_for(range_type(0, globalColindsSym.extent(0)), convertSym);
296 handle.create_spadd_handle(sorted);
297 auto addHandle = handle.get_spadd_handle();
299 KokkosSparse::Experimental::spadd_symbolic
301 #
if KOKKOSKERNELS_VERSION >= 40299
302 nrows, graph->getGlobalNumCols(),
304 rowptrs, colinds, rowptrsT, colindsT, rowptrsSym);
305 colindsSym = col_inds_array(Kokkos::ViewAllocateWithoutInitializing(
"C colinds"), addHandle->get_c_nnz());
308 SortedNumericIndicesOnlyFunctor<
309 size_t, LocalOrdinal,
310 typename row_ptrs_array::const_type,
typename row_ptrs_array::const_type, row_ptrs_array,
311 typename col_inds_array::const_type,
typename col_inds_array::const_type, col_inds_array>
312 sortedNumeric(rowptrs, rowptrsT, rowptrsSym,
313 colinds, colindsT, colindsSym);
314 Kokkos::parallel_for(
"KokkosSparse::SpAdd:Numeric::InputSorted",
315 range_type(0, nrows), sortedNumeric);
318 UnsortedNumericIndicesOnlyFunctor<
319 size_t, LocalOrdinal,
320 typename row_ptrs_array::const_type,
typename row_ptrs_array::const_type, row_ptrs_array,
321 typename col_inds_array::const_type,
typename col_inds_array::const_type, col_inds_array>
322 unsortedNumeric(rowptrs, rowptrsT, rowptrsSym,
323 colinds, colindsT, colindsSym,
324 addHandle->get_a_pos(), addHandle->get_b_pos());
325 Kokkos::parallel_for(
"KokkosSparse::SpAdd:Numeric::InputNotSorted",
326 range_type(0, nrows), unsortedNumeric);
330 col_map_sym = col_map;
331 importer = graph->getImporter();
336 KokkosSparse::sort_crs_graph<execution_space, row_ptrs_array, col_inds_array>(rowptrsSym, colindsSym);
338 local_graph_device_type lclGraphSym = local_graph_device_type(colindsSym, rowptrsSym);
340 RCP<Teuchos::ParameterList> graphParams = Teuchos::null;
342 graphParams = rcp(
new Teuchos::ParameterList);
343 graphParams->set(
"sorted",
false);
356 template<
class LocalOrdinal,
359 Teuchos::RCP<CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
365 RCP<crs_graph_type> transGraphWithSharedRows = createTransposeLocal (params);
367 #ifdef HAVE_TPETRA_MMM_TIMINGS
368 const std::string prefix = std::string (
"Tpetra ") + label_ +
": ";
369 using Teuchos::TimeMonitor;
370 TimeMonitor MM (*TimeMonitor::getNewTimer (prefix +
"Transpose TAFC"));
377 RCP<const export_type> exporter =
378 transGraphWithSharedRows->getExporter ();
379 if (exporter.is_null ()) {
380 return transGraphWithSharedRows;
383 Teuchos::ParameterList labelList;
384 #ifdef HAVE_TPETRA_MMM_TIMINGS
385 labelList.set(
"Timer Label", label_);
387 if(! params.is_null ()) {
388 const char paramName[] =
"compute global constants";
389 labelList.set (paramName, params->get (paramName,
true));
394 return exportAndFillCompleteCrsGraph<crs_graph_type>
395 (transGraphWithSharedRows, *exporter, Teuchos::null,
396 Teuchos::null, Teuchos::rcpFromRef (labelList));
400 template<
class LocalOrdinal,
403 Teuchos::RCP<CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
409 using Teuchos::rcp_dynamic_cast;
410 using LO = LocalOrdinal;
411 using GO = GlobalOrdinal;
415 #ifdef HAVE_TPETRA_MMM_TIMINGS
416 std::string prefix = std::string(
"Tpetra ") + label_ +
": ";
417 using Teuchos::TimeMonitor;
418 TimeMonitor MM (*TimeMonitor::getNewTimer (prefix +
"Transpose Local"));
421 const bool sort = [&] () {
422 constexpr
bool sortDefault =
true;
423 const char sortParamName[] =
"sort";
424 return params.get () ==
nullptr ? sortDefault :
425 params->get (sortParamName, sortDefault);
429 local_graph_device_type lclGraph = origGraph_->getLocalGraphDevice ();
432 using c_rowmap_t =
typename local_graph_device_type::row_map_type;
433 using c_entries_t =
typename local_graph_device_type::entries_type;
434 using rowmap_t =
typename local_graph_device_type::row_map_type::non_const_type;
435 using entries_t =
typename local_graph_device_type::entries_type::non_const_type;
436 LocalOrdinal numCols = origGraph_->getColMap()->getLocalNumElements();
437 rowmap_t lclGraphT_rowmap(
"Transpose rowmap", numCols + 1);
438 entries_t lclGraphT_entries(
439 Kokkos::ViewAllocateWithoutInitializing(
"Transpose entries"), lclGraph.entries.extent(0));
440 KokkosSparse::Impl::transpose_graph<
441 c_rowmap_t, c_entries_t,
443 rowmap_t,
typename local_graph_device_type::execution_space>(
444 lclGraph.numRows(), numCols,
445 lclGraph.row_map, lclGraph.entries,
446 lclGraphT_rowmap, lclGraphT_entries);
449 KokkosSparse::sort_crs_graph<
450 typename local_graph_device_type::execution_space,
451 rowmap_t, entries_t>(
456 local_graph_device_type lclGraphT = local_graph_device_type(lclGraphT_entries, lclGraphT_rowmap);
460 const auto origExport = origGraph_->getExporter ();
461 RCP<const import_type> myImport = origExport.is_null () ?
462 Teuchos::null : rcp (
new import_type (*origExport));
463 const auto origImport = origGraph_->getImporter ();
464 RCP<const export_type> myExport = origImport.is_null () ?
465 Teuchos::null : rcp (
new export_type (*origImport));
467 RCP<Teuchos::ParameterList> graphParams = Teuchos::null;
469 graphParams = rcp(
new Teuchos::ParameterList);
470 graphParams->set(
"sorted",
false);
474 origGraph_->getColMap (),
475 origGraph_->getRowMap (),
476 origGraph_->getRangeMap (),
477 origGraph_->getDomainMap (),
478 myImport, myExport, graphParams));
487 #define TPETRA_CRSGRAPHTRANSPOSER_INSTANT(LO,GO,NODE) \
488 template class CrsGraphTransposer< LO , GO , NODE >;
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Declaration and definition of functions for sorting "short" arrays of keys and corresponding values...
Kokkos::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, device_type, void, size_t > local_graph_device_type
The type of the part of the sparse graph on each MPI process.
Teuchos::RCP< crs_graph_type > createTransposeLocal(const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Compute and return the transpose of the graph given to the constructor.
"Local" part of Map suitable for Kokkos kernels.
void sort(View &view, const size_t &size)
Convenience wrapper for std::sort for host-accessible views.
CrsGraphTransposer(const Teuchos::RCP< const crs_graph_type > &origGraph, const std::string &label=std::string())
Constructor that takes the graph to transpose.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Teuchos::RCP< crs_graph_type > createTranspose(const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Compute and return the transpose of the graph given to the constructor.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
Teuchos::RCP< crs_graph_type > symmetrize(const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Compute and return graph+graph^T of the graph given to the constructor.