Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_CrsGraphTransposer_def.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_CRSGRAPHTRANSPOSER_DEF_HPP
11 #define TPETRA_CRSGRAPHTRANSPOSER_DEF_HPP
12 
13 #include "Tpetra_CrsGraph.hpp"
14 #include "Tpetra_Export.hpp"
16 #include "Tpetra_Details_makeColMap.hpp"
18 #include "Teuchos_ParameterList.hpp"
19 #include "Teuchos_TimeMonitor.hpp"
20 #include "KokkosSparse_Utils.hpp"
21 #include "KokkosKernels_Handle.hpp"
22 #include "KokkosSparse_spadd.hpp"
23 
24 namespace Tpetra {
25 
26 template <typename GO,
27  typename LocalIndicesType,
28  typename GlobalIndicesType,
29  typename ColMapType>
30 struct ConvertLocalToGlobalFunctor {
31  ConvertLocalToGlobalFunctor(
32  const LocalIndicesType& colindsOrig_,
33  const GlobalIndicesType& colindsConverted_,
34  const ColMapType& colmap_)
35  : colindsOrig(colindsOrig_)
36  , colindsConverted(colindsConverted_)
37  , colmap(colmap_) {}
38  KOKKOS_INLINE_FUNCTION void
39  operator()(const GO i) const {
40  colindsConverted(i) = colmap.getGlobalElement(colindsOrig(i));
41  }
42  LocalIndicesType colindsOrig;
43  GlobalIndicesType colindsConverted;
44  ColMapType colmap;
45 };
46 
47 template <class LO, class GO, class LOView, class GOView, class LocalMap>
48 struct ConvertGlobalToLocalFunctor {
49  ConvertGlobalToLocalFunctor(LOView& lids_, const GOView& gids_, const LocalMap localColMap_)
50  : lids(lids_)
51  , gids(gids_)
52  , localColMap(localColMap_) {}
53 
54  KOKKOS_FUNCTION void operator()(const GO i) const {
55  lids(i) = localColMap.getLocalElement(gids(i));
56  }
57 
58  LOView lids;
59  const GOView gids;
60  const LocalMap localColMap;
61 };
62 
63 template <typename size_type, typename ordinal_type,
64  typename ArowptrsT, typename BrowptrsT, typename CrowptrsT,
65  typename AcolindsT, typename BcolindsT, typename CcolindsT>
66 struct SortedNumericIndicesOnlyFunctor {
67  SortedNumericIndicesOnlyFunctor(const ArowptrsT& Arowptrs_,
68  const BrowptrsT& Browptrs_,
69  const CrowptrsT& Crowptrs_,
70  const AcolindsT& Acolinds_,
71  const BcolindsT& Bcolinds_,
72  const CcolindsT& Ccolinds_)
73  : Arowptrs(Arowptrs_)
74  , Browptrs(Browptrs_)
75  , Crowptrs(Crowptrs_)
76  , Acolinds(Acolinds_)
77  , Bcolinds(Bcolinds_)
78  , Ccolinds(Ccolinds_) {}
79 
80  KOKKOS_INLINE_FUNCTION void operator()(const ordinal_type i) const {
81 #if KOKKOS_VERSION >= 40799
82  const ordinal_type ORDINAL_MAX = KokkosKernels::ArithTraits<ordinal_type>::max();
83 #else
84  const ordinal_type ORDINAL_MAX = Kokkos::ArithTraits<ordinal_type>::max();
85 #endif
86 
87  // count the union of nonzeros in Arow and Brow
88  size_type ai = 0;
89  size_type bi = 0;
90  size_type Arowstart = Arowptrs(i);
91  size_type Arowlen = Arowptrs(i + 1) - Arowstart;
92  size_type Browstart = Browptrs(i);
93  size_type Browlen = Browptrs(i + 1) - Browstart;
94  ordinal_type Acol = (Arowlen == 0) ? ORDINAL_MAX : Acolinds(Arowstart);
95  ordinal_type Bcol = (Browlen == 0) ? ORDINAL_MAX : Bcolinds(Browstart);
96  size_type Coffset = Crowptrs(i);
97  while (Acol != ORDINAL_MAX || Bcol != ORDINAL_MAX) {
98  ordinal_type Ccol = (Acol < Bcol) ? Acol : Bcol;
99  while (Acol == Ccol) {
100  ai++;
101  if (ai == Arowlen)
102  Acol = ORDINAL_MAX;
103  else
104  Acol = Acolinds(Arowstart + ai);
105  }
106  while (Bcol == Ccol) {
107  bi++;
108  if (bi == Browlen)
109  Bcol = ORDINAL_MAX;
110  else
111  Bcol = Bcolinds(Browstart + bi);
112  }
113  Ccolinds(Coffset) = Ccol;
114  Coffset++;
115  }
116  }
117 
118  const ArowptrsT Arowptrs;
119  const BrowptrsT Browptrs;
120  const CrowptrsT Crowptrs;
121  const AcolindsT Acolinds;
122  const BcolindsT Bcolinds;
123  CcolindsT Ccolinds;
124 };
125 
126 template <typename size_type, typename ordinal_type,
127  typename ArowptrsT, typename BrowptrsT, typename CrowptrsT,
128  typename AcolindsT, typename BcolindsT, typename CcolindsT>
129 struct UnsortedNumericIndicesOnlyFunctor {
130  UnsortedNumericIndicesOnlyFunctor(
131  const ArowptrsT Arowptrs_, const BrowptrsT Browptrs_, const CrowptrsT Crowptrs_,
132  const AcolindsT Acolinds_, const BcolindsT Bcolinds_, CcolindsT Ccolinds_,
133  const CcolindsT Apos_, const CcolindsT Bpos_)
134  : Arowptrs(Arowptrs_)
135  , Browptrs(Browptrs_)
136  , Crowptrs(Crowptrs_)
137  , Acolinds(Acolinds_)
138  , Bcolinds(Bcolinds_)
139  , Ccolinds(Ccolinds_)
140  , Apos(Apos_)
141  , Bpos(Bpos_) {}
142 
143  KOKKOS_INLINE_FUNCTION void operator()(const ordinal_type i) const {
144  size_type CrowStart = Crowptrs(i);
145  size_type ArowStart = Arowptrs(i);
146  size_type ArowEnd = Arowptrs(i + 1);
147  size_type BrowStart = Browptrs(i);
148  size_type BrowEnd = Browptrs(i + 1);
149  // add in A entries, while setting C colinds
150  for (size_type j = ArowStart; j < ArowEnd; j++) {
151  Ccolinds(CrowStart + Apos(j)) = Acolinds(j);
152  }
153  // add in B entries, while setting C colinds
154  for (size_type j = BrowStart; j < BrowEnd; j++) {
155  Ccolinds(CrowStart + Bpos(j)) = Bcolinds(j);
156  }
157  }
158  const ArowptrsT Arowptrs;
159  const BrowptrsT Browptrs;
160  const CrowptrsT Crowptrs;
161  const AcolindsT Acolinds;
162  const BcolindsT Bcolinds;
163  CcolindsT Ccolinds;
164  const CcolindsT Apos;
165  const CcolindsT Bpos;
166 };
167 
168 template <class LocalOrdinal,
169  class GlobalOrdinal,
170  class Node>
172  CrsGraphTransposer(const Teuchos::RCP<const crs_graph_type>& origGraph,
173  const std::string& label)
174  : origGraph_(origGraph)
175  , label_(label) {}
176 
177 template <class LocalOrdinal,
178  class GlobalOrdinal,
179  class Node>
180 Teuchos::RCP<CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
182  symmetrize(const Teuchos::RCP<Teuchos::ParameterList>& params) {
183  using Teuchos::RCP;
184  using device_type = typename Node::device_type;
185  using execution_space = typename device_type::execution_space;
186  using range_type = Kokkos::RangePolicy<execution_space, size_t>;
187  using local_graph_device_type = typename crs_graph_type::local_graph_device_type;
188  using impl_scalar_type = ::Tpetra::Details::DefaultTypes::scalar_type;
189  using row_ptrs_array = typename local_graph_device_type::row_map_type::non_const_type;
190  using col_inds_array = typename local_graph_device_type::entries_type::non_const_type;
191  using local_map_type = typename map_type::local_map_type;
192  using global_col_inds_array = typename Kokkos::View<GlobalOrdinal*, device_type>;
193 
194  auto graph = origGraph_;
195  auto domain_map = graph->getDomainMap();
196  auto range_map = graph->getRangeMap();
197  auto row_map = graph->getRowMap();
198  auto col_map = graph->getColMap();
199  RCP<const map_type> col_map_sym;
200  RCP<const Import<LocalOrdinal, GlobalOrdinal, Node> > importer;
201 
202  TEUCHOS_ASSERT(domain_map->isSameAs(*range_map));
203  TEUCHOS_ASSERT(domain_map->isSameAs(*row_map));
204 
205  // Do the transpose
206  RCP<crs_graph_type> graphT = createTranspose(params);
207 
208  auto col_map_T = graphT->getColMap();
209  TEUCHOS_ASSERT(!col_map_T.is_null());
210  TEUCHOS_ASSERT(domain_map->isSameAs(*graphT->getDomainMap()));
211 
212  bool graphSorted = graph->isSorted();
213  bool graphTSorted = graphT->isSorted();
214  bool sorted = graphSorted && graphTSorted;
215  bool matchingColMaps = col_map->isSameAs(*col_map_T);
216 
217  auto lclGraph = graph->getLocalGraphDevice();
218  auto lclGraphT = graphT->getLocalGraphDevice();
219 
220  using KKH_LO = KokkosKernels::Experimental::KokkosKernelsHandle<size_t, LocalOrdinal, impl_scalar_type,
221  typename Node::execution_space, typename Node::memory_space, typename Node::memory_space>;
222  using KKH_GO = KokkosKernels::Experimental::KokkosKernelsHandle<size_t, GlobalOrdinal, impl_scalar_type,
223  typename Node::execution_space, typename Node::memory_space, typename Node::memory_space>;
224 
225  auto rowptrs = lclGraph.row_map;
226  auto rowptrsT = lclGraphT.row_map;
227  auto colinds = lclGraph.entries;
228  auto colindsT = lclGraphT.entries;
229 
230  auto nrows = rowptrs.extent(0) - 1;
231  auto rowptrsSym = row_ptrs_array(Kokkos::ViewAllocateWithoutInitializing("row ptrs sym"), nrows + 1);
232 
233  col_inds_array colindsSym;
234 
235  if (!matchingColMaps) {
236  // convert indices of local graph to GlobalOrdinal
237  auto lclColmap = col_map->getLocalMap();
238  global_col_inds_array colindsConverted(Kokkos::ViewAllocateWithoutInitializing("colinds (converted)"), colinds.extent(0));
239  ConvertLocalToGlobalFunctor<GlobalOrdinal, col_inds_array, global_col_inds_array, local_map_type> convert(colinds, colindsConverted, lclColmap);
240  Kokkos::parallel_for("colInds (converted)", range_type(0, colinds.extent(0)), convert);
241 
242  // convert indices of local graphT to GlobalOrdinal
243  auto lclColmapT = col_map_T->getLocalMap();
244  global_col_inds_array colindsTConverted(Kokkos::ViewAllocateWithoutInitializing("colindsT (converted)"), colindsT.extent(0));
245  ConvertLocalToGlobalFunctor<GlobalOrdinal, col_inds_array, global_col_inds_array, local_map_type> convertT(colindsT, colindsTConverted, lclColmapT);
246  Kokkos::parallel_for("colIndsT (converted)", range_type(0, colindsT.extent(0)), convertT);
247 
248  // sum graph and graphT in GlobalOrdinal
249  KKH_GO handle;
250  handle.create_spadd_handle(false);
251  auto addHandle = handle.get_spadd_handle();
252 
253  global_col_inds_array globalColindsSym;
254 
255  KokkosSparse::spadd_symbolic(&handle,
256  nrows, graph->getGlobalNumCols(),
257  rowptrs, colindsConverted, rowptrsT, colindsTConverted, rowptrsSym);
258  globalColindsSym = global_col_inds_array(Kokkos::ViewAllocateWithoutInitializing("global colinds sym"), addHandle->get_c_nnz());
259 
260  UnsortedNumericIndicesOnlyFunctor<
261  size_t, GlobalOrdinal,
262  typename row_ptrs_array::const_type, typename row_ptrs_array::const_type, row_ptrs_array,
263  typename global_col_inds_array::const_type, typename global_col_inds_array::const_type, global_col_inds_array>
264  unsortedNumeric(rowptrs, rowptrsT, rowptrsSym,
265  colindsConverted, colindsTConverted, globalColindsSym,
266  addHandle->get_a_pos(), addHandle->get_b_pos());
267  Kokkos::parallel_for("KokkosSparse::SpAdd:Numeric::InputNotSorted",
268  range_type(0, nrows), unsortedNumeric);
269 
270  // build column map for graphSym
271  Tpetra::Details::makeColMap<LocalOrdinal, GlobalOrdinal, Node>(col_map_sym, domain_map, globalColindsSym);
272 
273  // convert indices of local graphSym to LocalOrdinal
274  auto lclColmapSym = col_map_sym->getLocalMap();
275  colindsSym = col_inds_array("colindsSym", globalColindsSym.extent(0));
276  ConvertGlobalToLocalFunctor<LocalOrdinal, GlobalOrdinal, col_inds_array, global_col_inds_array, typename map_type::local_map_type> convertSym(colindsSym, globalColindsSym, lclColmapSym);
277  Kokkos::parallel_for(range_type(0, globalColindsSym.extent(0)), convertSym);
278 
279  } else {
280  // sum graph and graphT in LocalOrdinal
281  KKH_LO handle;
282  handle.create_spadd_handle(sorted);
283  auto addHandle = handle.get_spadd_handle();
284 
285  KokkosSparse::spadd_symbolic(&handle,
286  nrows, graph->getGlobalNumCols(),
287  rowptrs, colinds, rowptrsT, colindsT, rowptrsSym);
288  colindsSym = col_inds_array(Kokkos::ViewAllocateWithoutInitializing("C colinds"), addHandle->get_c_nnz());
289 
290  if (sorted) {
291  SortedNumericIndicesOnlyFunctor<
292  size_t, LocalOrdinal,
293  typename row_ptrs_array::const_type, typename row_ptrs_array::const_type, row_ptrs_array,
294  typename col_inds_array::const_type, typename col_inds_array::const_type, col_inds_array>
295  sortedNumeric(rowptrs, rowptrsT, rowptrsSym,
296  colinds, colindsT, colindsSym);
297  Kokkos::parallel_for("KokkosSparse::SpAdd:Numeric::InputSorted",
298  range_type(0, nrows), sortedNumeric);
299 
300  } else {
301  UnsortedNumericIndicesOnlyFunctor<
302  size_t, LocalOrdinal,
303  typename row_ptrs_array::const_type, typename row_ptrs_array::const_type, row_ptrs_array,
304  typename col_inds_array::const_type, typename col_inds_array::const_type, col_inds_array>
305  unsortedNumeric(rowptrs, rowptrsT, rowptrsSym,
306  colinds, colindsT, colindsSym,
307  addHandle->get_a_pos(), addHandle->get_b_pos());
308  Kokkos::parallel_for("KokkosSparse::SpAdd:Numeric::InputNotSorted",
309  range_type(0, nrows), unsortedNumeric);
310  }
311 
312  // column map for graphSym is graph's column map
313  col_map_sym = col_map;
314  importer = graph->getImporter();
315  }
316 
317  bool sort = true;
318  if (sort)
319  KokkosSparse::sort_crs_graph<execution_space, row_ptrs_array, col_inds_array>(rowptrsSym, colindsSym);
320 
321  local_graph_device_type lclGraphSym = local_graph_device_type(colindsSym, rowptrsSym);
322 
323  RCP<Teuchos::ParameterList> graphParams = Teuchos::null;
324  if (!sort) {
325  graphParams = rcp(new Teuchos::ParameterList);
326  graphParams->set("sorted", false);
327  }
328 
329  return rcp(new crs_graph_type(lclGraphSym,
330  row_map,
331  col_map_sym,
332  domain_map,
333  range_map,
334  importer,
335  Teuchos::null,
336  graphParams));
337 }
338 
339 template <class LocalOrdinal,
340  class GlobalOrdinal,
341  class Node>
342 Teuchos::RCP<CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
344  createTranspose(const Teuchos::RCP<Teuchos::ParameterList>& params) {
345  using Teuchos::RCP;
346  // Do the local transpose
347  RCP<crs_graph_type> transGraphWithSharedRows = createTransposeLocal(params);
348 
349 #ifdef HAVE_TPETRA_MMM_TIMINGS
350  const std::string prefix = std::string("Tpetra ") + label_ + ": ";
351  using Teuchos::TimeMonitor;
352  TimeMonitor MM(*TimeMonitor::getNewTimer(prefix + "Transpose TAFC"));
353 #endif
354 
355  // If transGraphWithSharedRows has an exporter, that's what we
356  // want. If it doesn't, the rows aren't actually shared, and we're
357  // done!
358  using export_type = Export<LocalOrdinal, GlobalOrdinal, Node>;
359  RCP<const export_type> exporter =
360  transGraphWithSharedRows->getExporter();
361  if (exporter.is_null()) {
362  return transGraphWithSharedRows;
363  } else {
364  Teuchos::ParameterList labelList;
365 #ifdef HAVE_TPETRA_MMM_TIMINGS
366  labelList.set("Timer Label", label_);
367 #endif
368  if (!params.is_null()) {
369  const char paramName[] = "compute global constants";
370  labelList.set(paramName, params->get(paramName, true));
371  }
372  // Use the Export object to do a fused Export and fillComplete.
373  // This always sorts the local graph after communication, so
374  // no need to set "sorted = false" in parameters.
375  return exportAndFillCompleteCrsGraph<crs_graph_type>(transGraphWithSharedRows, *exporter, Teuchos::null,
376  Teuchos::null, Teuchos::rcpFromRef(labelList));
377  }
378 }
379 
380 template <class LocalOrdinal,
381  class GlobalOrdinal,
382  class Node>
383 Teuchos::RCP<CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
385  createTransposeLocal(const Teuchos::RCP<Teuchos::ParameterList>& params) {
386  using Teuchos::RCP;
387  using Teuchos::rcp;
388  using Teuchos::rcp_dynamic_cast;
389  using LO = LocalOrdinal;
390  using GO = GlobalOrdinal;
391  using import_type = Tpetra::Import<LO, GO, Node>;
392  using export_type = Tpetra::Export<LO, GO, Node>;
393 
394 #ifdef HAVE_TPETRA_MMM_TIMINGS
395  std::string prefix = std::string("Tpetra ") + label_ + ": ";
396  using Teuchos::TimeMonitor;
397  TimeMonitor MM(*TimeMonitor::getNewTimer(prefix + "Transpose Local"));
398 #endif
399 
400  const bool sort = [&]() {
401  constexpr bool sortDefault = true; // see #4607 discussion
402  const char sortParamName[] = "sort";
403  return params.get() == nullptr ? sortDefault : params->get(sortParamName, sortDefault);
404  }();
405 
406  using local_graph_device_type = typename crs_graph_type::local_graph_device_type;
407  local_graph_device_type lclGraph = origGraph_->getLocalGraphDevice();
408 
409  // Allocate views and call the other version of transpose_graph
410  using c_rowmap_t = typename local_graph_device_type::row_map_type;
411  using c_entries_t = typename local_graph_device_type::entries_type;
412  using rowmap_t = typename local_graph_device_type::row_map_type::non_const_type;
413  using entries_t = typename local_graph_device_type::entries_type::non_const_type;
414  LocalOrdinal numCols = origGraph_->getColMap()->getLocalNumElements();
415  rowmap_t lclGraphT_rowmap("Transpose rowmap", numCols + 1);
416  entries_t lclGraphT_entries(
417  Kokkos::ViewAllocateWithoutInitializing("Transpose entries"), lclGraph.entries.extent(0));
418  KokkosSparse::Impl::transpose_graph<
419  c_rowmap_t, c_entries_t,
420  rowmap_t, entries_t,
421  rowmap_t, typename local_graph_device_type::execution_space>(
422  lclGraph.numRows(), numCols,
423  lclGraph.row_map, lclGraph.entries,
424  lclGraphT_rowmap, lclGraphT_entries);
425 
426  if (sort)
427  KokkosSparse::sort_crs_graph<
428  typename local_graph_device_type::execution_space,
429  rowmap_t, entries_t>(
430  lclGraphT_rowmap,
431  lclGraphT_entries);
432 
433  // And construct the transpose local_graph_device_type
434  local_graph_device_type lclGraphT = local_graph_device_type(lclGraphT_entries, lclGraphT_rowmap);
435 
436  // Prebuild the importers and exporters the no-communication way,
437  // flipping the importers and exporters around.
438  const auto origExport = origGraph_->getExporter();
439  RCP<const import_type> myImport = origExport.is_null() ? Teuchos::null : rcp(new import_type(*origExport));
440  const auto origImport = origGraph_->getImporter();
441  RCP<const export_type> myExport = origImport.is_null() ? Teuchos::null : rcp(new export_type(*origImport));
442 
443  RCP<Teuchos::ParameterList> graphParams = Teuchos::null;
444  if (!sort) {
445  graphParams = rcp(new Teuchos::ParameterList);
446  graphParams->set("sorted", false);
447  }
448 
449  return rcp(new crs_graph_type(lclGraphT,
450  origGraph_->getColMap(),
451  origGraph_->getRowMap(),
452  origGraph_->getRangeMap(),
453  origGraph_->getDomainMap(),
454  myImport, myExport, graphParams));
455 }
456 
457 //
458 // Explicit instantiation macro
459 //
460 // Must be expanded from within the Tpetra namespace!
461 //
462 
463 #define TPETRA_CRSGRAPHTRANSPOSER_INSTANT(LO, GO, NODE) \
464  template class CrsGraphTransposer<LO, GO, NODE>;
465 
466 } // namespace Tpetra
467 
468 #endif
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
KokkosSparse::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, device_type, void, size_t > local_graph_device_type
The type of the part of the sparse graph on each MPI process.
Declaration and definition of functions for sorting &quot;short&quot; arrays of keys and corresponding values...
Teuchos::RCP< crs_graph_type > createTransposeLocal(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Compute and return the transpose of the graph given to the constructor.
&quot;Local&quot; part of Map suitable for Kokkos kernels.
void sort(View &view, const size_t &size)
Convenience wrapper for std::sort for host-accessible views.
CrsGraphTransposer(const Teuchos::RCP< const crs_graph_type > &origGraph, const std::string &label=std::string())
Constructor that takes the graph to transpose.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Teuchos::RCP< crs_graph_type > createTranspose(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Compute and return the transpose of the graph given to the constructor.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
Teuchos::RCP< crs_graph_type > symmetrize(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Compute and return graph+graph^T of the graph given to the constructor.