Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_CrsGraphTransposer_def.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_CRSGRAPHTRANSPOSER_DEF_HPP
11 #define TPETRA_CRSGRAPHTRANSPOSER_DEF_HPP
12 
13 #include "Tpetra_CrsGraph.hpp"
14 #include "Tpetra_Export.hpp"
16 #include "Tpetra_Details_makeColMap.hpp"
18 #include "Teuchos_ParameterList.hpp"
19 #include "Teuchos_TimeMonitor.hpp"
20 #include "KokkosSparse_Utils.hpp"
21 #include "KokkosKernels_Handle.hpp"
22 #include "KokkosSparse_spadd.hpp"
23 
24 namespace Tpetra {
25 
26  template<typename GO,
27  typename LocalIndicesType,
28  typename GlobalIndicesType,
29  typename ColMapType>
30  struct ConvertLocalToGlobalFunctor
31  {
32  ConvertLocalToGlobalFunctor(
33  const LocalIndicesType& colindsOrig_,
34  const GlobalIndicesType& colindsConverted_,
35  const ColMapType& colmap_) :
36  colindsOrig (colindsOrig_),
37  colindsConverted (colindsConverted_),
38  colmap (colmap_)
39  {}
40  KOKKOS_INLINE_FUNCTION void
41  operator() (const GO i) const
42  {
43  colindsConverted(i) = colmap.getGlobalElement(colindsOrig(i));
44  }
45  LocalIndicesType colindsOrig;
46  GlobalIndicesType colindsConverted;
47  ColMapType colmap;
48  };
49 
50  template<class LO, class GO, class LOView, class GOView, class LocalMap>
51  struct ConvertGlobalToLocalFunctor
52  {
53  ConvertGlobalToLocalFunctor(LOView& lids_, const GOView& gids_, const LocalMap localColMap_)
54  : lids(lids_), gids(gids_), localColMap(localColMap_)
55  {}
56 
57  KOKKOS_FUNCTION void operator() (const GO i) const
58  {
59  lids(i) = localColMap.getLocalElement(gids(i));
60  }
61 
62  LOView lids;
63  const GOView gids;
64  const LocalMap localColMap;
65  };
66 
67 
68  template <typename size_type, typename ordinal_type,
69  typename ArowptrsT, typename BrowptrsT, typename CrowptrsT,
70  typename AcolindsT, typename BcolindsT, typename CcolindsT>
71  struct SortedNumericIndicesOnlyFunctor {
72 
73  SortedNumericIndicesOnlyFunctor(const ArowptrsT& Arowptrs_,
74  const BrowptrsT& Browptrs_,
75  const CrowptrsT& Crowptrs_,
76  const AcolindsT& Acolinds_,
77  const BcolindsT& Bcolinds_,
78  const CcolindsT& Ccolinds_)
79  : Arowptrs(Arowptrs_),
80  Browptrs(Browptrs_),
81  Crowptrs(Crowptrs_),
82  Acolinds(Acolinds_),
83  Bcolinds(Bcolinds_),
84  Ccolinds(Ccolinds_) {}
85 
86  KOKKOS_INLINE_FUNCTION void operator()(const ordinal_type i) const
87  {
88  const ordinal_type ORDINAL_MAX = Kokkos::ArithTraits<ordinal_type>::max();
89 
90  // count the union of nonzeros in Arow and Brow
91  size_type ai = 0;
92  size_type bi = 0;
93  size_type Arowstart = Arowptrs(i);
94  size_type Arowlen = Arowptrs(i + 1) - Arowstart;
95  size_type Browstart = Browptrs(i);
96  size_type Browlen = Browptrs(i + 1) - Browstart;
97  ordinal_type Acol = (Arowlen == 0) ? ORDINAL_MAX : Acolinds(Arowstart);
98  ordinal_type Bcol = (Browlen == 0) ? ORDINAL_MAX : Bcolinds(Browstart);
99  size_type Coffset = Crowptrs(i);
100  while (Acol != ORDINAL_MAX || Bcol != ORDINAL_MAX)
101  {
102  ordinal_type Ccol = (Acol < Bcol) ? Acol : Bcol;
103  while(Acol == Ccol)
104  {
105  ai++;
106  if(ai == Arowlen)
107  Acol = ORDINAL_MAX;
108  else
109  Acol = Acolinds(Arowstart + ai);
110  }
111  while(Bcol == Ccol)
112  {
113  bi++;
114  if(bi == Browlen)
115  Bcol = ORDINAL_MAX;
116  else
117  Bcol = Bcolinds(Browstart + bi);
118  }
119  Ccolinds(Coffset) = Ccol;
120  Coffset++;
121  }
122  }
123 
124  const ArowptrsT Arowptrs;
125  const BrowptrsT Browptrs;
126  const CrowptrsT Crowptrs;
127  const AcolindsT Acolinds;
128  const BcolindsT Bcolinds;
129  CcolindsT Ccolinds;
130  };
131 
132  template <typename size_type, typename ordinal_type,
133  typename ArowptrsT, typename BrowptrsT, typename CrowptrsT,
134  typename AcolindsT, typename BcolindsT, typename CcolindsT>
135  struct UnsortedNumericIndicesOnlyFunctor {
136 
137  UnsortedNumericIndicesOnlyFunctor(
138  const ArowptrsT Arowptrs_, const BrowptrsT Browptrs_, const CrowptrsT Crowptrs_,
139  const AcolindsT Acolinds_, const BcolindsT Bcolinds_, CcolindsT Ccolinds_,
140  const CcolindsT Apos_, const CcolindsT Bpos_)
141  : Arowptrs(Arowptrs_),
142  Browptrs(Browptrs_),
143  Crowptrs(Crowptrs_),
144  Acolinds(Acolinds_),
145  Bcolinds(Bcolinds_),
146  Ccolinds(Ccolinds_),
147  Apos(Apos_),
148  Bpos(Bpos_) {}
149 
150  KOKKOS_INLINE_FUNCTION void operator()(const ordinal_type i) const {
151  size_type CrowStart = Crowptrs(i);
152  size_type ArowStart = Arowptrs(i);
153  size_type ArowEnd = Arowptrs(i + 1);
154  size_type BrowStart = Browptrs(i);
155  size_type BrowEnd = Browptrs(i + 1);
156  // add in A entries, while setting C colinds
157  for (size_type j = ArowStart; j < ArowEnd; j++) {
158  Ccolinds(CrowStart + Apos(j)) = Acolinds(j);
159  }
160  // add in B entries, while setting C colinds
161  for (size_type j = BrowStart; j < BrowEnd; j++) {
162  Ccolinds(CrowStart + Bpos(j)) = Bcolinds(j);
163  }
164  }
165  const ArowptrsT Arowptrs;
166  const BrowptrsT Browptrs;
167  const CrowptrsT Crowptrs;
168  const AcolindsT Acolinds;
169  const BcolindsT Bcolinds;
170  CcolindsT Ccolinds;
171  const CcolindsT Apos;
172  const CcolindsT Bpos;
173  };
174 
175 
176  template<class LocalOrdinal,
177  class GlobalOrdinal,
178  class Node>
180  CrsGraphTransposer (const Teuchos::RCP<const crs_graph_type>& origGraph,
181  const std::string& label)
182  : origGraph_ (origGraph), label_ (label)
183  {}
184 
185  template<class LocalOrdinal,
186  class GlobalOrdinal,
187  class Node>
188  Teuchos::RCP<CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
190  symmetrize (const Teuchos::RCP<Teuchos::ParameterList> &params)
191  {
192  using Teuchos::RCP;
193  using device_type = typename Node::device_type;
194  using execution_space = typename device_type::execution_space;
195  using range_type = Kokkos::RangePolicy<execution_space, size_t>;
196  using local_graph_device_type = typename crs_graph_type::local_graph_device_type;
197  using impl_scalar_type = ::Tpetra::Details::DefaultTypes::scalar_type;
198  using row_ptrs_array = typename local_graph_device_type::row_map_type::non_const_type ;
199  using col_inds_array = typename local_graph_device_type::entries_type::non_const_type;
200  using local_map_type = typename map_type::local_map_type;
201  using global_col_inds_array = typename Kokkos::View<GlobalOrdinal*, device_type>;
202 
203  auto graph = origGraph_;
204  auto domain_map = graph->getDomainMap();
205  auto range_map = graph->getRangeMap();
206  auto row_map = graph->getRowMap();
207  auto col_map = graph->getColMap();
208  RCP<const map_type> col_map_sym;
209  RCP<const Import<LocalOrdinal,GlobalOrdinal,Node> > importer;
210 
211  TEUCHOS_ASSERT(domain_map->isSameAs(*range_map));
212  TEUCHOS_ASSERT(domain_map->isSameAs(*row_map));
213 
214  // Do the transpose
215  RCP<crs_graph_type> graphT = createTranspose (params);
216 
217  auto col_map_T = graphT->getColMap();
218  TEUCHOS_ASSERT(!col_map_T.is_null());
219  TEUCHOS_ASSERT(domain_map->isSameAs(*graphT->getDomainMap()));
220 
221  bool graphSorted = graph->isSorted();
222  bool graphTSorted = graphT->isSorted();
223  bool sorted = graphSorted && graphTSorted;
224  bool matchingColMaps = col_map->isSameAs(*col_map_T);
225 
226  auto lclGraph = graph->getLocalGraphDevice();
227  auto lclGraphT = graphT->getLocalGraphDevice();
228 
229  using KKH_LO = KokkosKernels::Experimental::KokkosKernelsHandle<size_t, LocalOrdinal, impl_scalar_type,
230  typename Node::execution_space, typename Node::memory_space, typename Node::memory_space>;
231  using KKH_GO = KokkosKernels::Experimental::KokkosKernelsHandle<size_t, GlobalOrdinal, impl_scalar_type,
232  typename Node::execution_space, typename Node::memory_space, typename Node::memory_space>;
233 
234  auto rowptrs = lclGraph.row_map;
235  auto rowptrsT = lclGraphT.row_map;
236  auto colinds = lclGraph.entries;
237  auto colindsT = lclGraphT.entries;
238 
239  auto nrows = rowptrs.extent(0) - 1;
240  auto rowptrsSym = row_ptrs_array(Kokkos::ViewAllocateWithoutInitializing("row ptrs sym"), nrows + 1);
241 
242  col_inds_array colindsSym;
243 
244  if(!matchingColMaps) {
245  // convert indices of local graph to GlobalOrdinal
246  auto lclColmap = col_map->getLocalMap();
247  global_col_inds_array colindsConverted(Kokkos::ViewAllocateWithoutInitializing("colinds (converted)"), colinds.extent(0));
248  ConvertLocalToGlobalFunctor<GlobalOrdinal, col_inds_array, global_col_inds_array, local_map_type> convert(colinds, colindsConverted, lclColmap);
249  Kokkos::parallel_for("colInds (converted)", range_type(0, colinds.extent(0)), convert);
250 
251  // convert indices of local graphT to GlobalOrdinal
252  auto lclColmapT = col_map_T->getLocalMap();
253  global_col_inds_array colindsTConverted(Kokkos::ViewAllocateWithoutInitializing("colindsT (converted)"), colindsT.extent(0));
254  ConvertLocalToGlobalFunctor<GlobalOrdinal, col_inds_array, global_col_inds_array, local_map_type> convertT(colindsT, colindsTConverted, lclColmapT);
255  Kokkos::parallel_for("colIndsT (converted)", range_type(0, colindsT.extent(0)), convertT);
256 
257  // sum graph and graphT in GlobalOrdinal
258  KKH_GO handle;
259  handle.create_spadd_handle(false);
260  auto addHandle = handle.get_spadd_handle();
261 
262  global_col_inds_array globalColindsSym;
263 
264  KokkosSparse::Experimental::spadd_symbolic
265  (&handle,
266 #if KOKKOSKERNELS_VERSION >= 40299
267  nrows, graph->getGlobalNumCols(),
268 #endif
269  rowptrs, colindsConverted, rowptrsT, colindsTConverted, rowptrsSym);
270  globalColindsSym = global_col_inds_array(Kokkos::ViewAllocateWithoutInitializing("global colinds sym"), addHandle->get_c_nnz());
271 
272  UnsortedNumericIndicesOnlyFunctor<
273  size_t, GlobalOrdinal,
274  typename row_ptrs_array::const_type, typename row_ptrs_array::const_type, row_ptrs_array,
275  typename global_col_inds_array::const_type, typename global_col_inds_array::const_type, global_col_inds_array>
276  unsortedNumeric(rowptrs, rowptrsT, rowptrsSym,
277  colindsConverted, colindsTConverted, globalColindsSym,
278  addHandle->get_a_pos(), addHandle->get_b_pos());
279  Kokkos::parallel_for("KokkosSparse::SpAdd:Numeric::InputNotSorted",
280  range_type(0, nrows), unsortedNumeric);
281 
282  // build column map for graphSym
283  Tpetra::Details::makeColMap<LocalOrdinal, GlobalOrdinal, Node>
284  (col_map_sym, domain_map, globalColindsSym);
285 
286  // convert indices of local graphSym to LocalOrdinal
287  auto lclColmapSym = col_map_sym->getLocalMap();
288  colindsSym = col_inds_array("colindsSym", globalColindsSym.extent(0));
289  ConvertGlobalToLocalFunctor<LocalOrdinal, GlobalOrdinal, col_inds_array, global_col_inds_array, typename map_type::local_map_type> convertSym(colindsSym, globalColindsSym, lclColmapSym);
290  Kokkos::parallel_for(range_type(0, globalColindsSym.extent(0)), convertSym);
291 
292  } else {
293 
294  // sum graph and graphT in LocalOrdinal
295  KKH_LO handle;
296  handle.create_spadd_handle(sorted);
297  auto addHandle = handle.get_spadd_handle();
298 
299  KokkosSparse::Experimental::spadd_symbolic
300  (&handle,
301 #if KOKKOSKERNELS_VERSION >= 40299
302  nrows, graph->getGlobalNumCols(),
303 #endif
304  rowptrs, colinds, rowptrsT, colindsT, rowptrsSym);
305  colindsSym = col_inds_array(Kokkos::ViewAllocateWithoutInitializing("C colinds"), addHandle->get_c_nnz());
306 
307  if (sorted) {
308  SortedNumericIndicesOnlyFunctor<
309  size_t, LocalOrdinal,
310  typename row_ptrs_array::const_type, typename row_ptrs_array::const_type, row_ptrs_array,
311  typename col_inds_array::const_type, typename col_inds_array::const_type, col_inds_array>
312  sortedNumeric(rowptrs, rowptrsT, rowptrsSym,
313  colinds, colindsT, colindsSym);
314  Kokkos::parallel_for("KokkosSparse::SpAdd:Numeric::InputSorted",
315  range_type(0, nrows), sortedNumeric);
316 
317  } else {
318  UnsortedNumericIndicesOnlyFunctor<
319  size_t, LocalOrdinal,
320  typename row_ptrs_array::const_type, typename row_ptrs_array::const_type, row_ptrs_array,
321  typename col_inds_array::const_type, typename col_inds_array::const_type, col_inds_array>
322  unsortedNumeric(rowptrs, rowptrsT, rowptrsSym,
323  colinds, colindsT, colindsSym,
324  addHandle->get_a_pos(), addHandle->get_b_pos());
325  Kokkos::parallel_for("KokkosSparse::SpAdd:Numeric::InputNotSorted",
326  range_type(0, nrows), unsortedNumeric);
327  }
328 
329  // column map for graphSym is graph's column map
330  col_map_sym = col_map;
331  importer = graph->getImporter();
332  }
333 
334  bool sort = true;
335  if (sort)
336  KokkosSparse::sort_crs_graph<execution_space, row_ptrs_array, col_inds_array>(rowptrsSym, colindsSym);
337 
338  local_graph_device_type lclGraphSym = local_graph_device_type(colindsSym, rowptrsSym);
339 
340  RCP<Teuchos::ParameterList> graphParams = Teuchos::null;
341  if(!sort) {
342  graphParams = rcp(new Teuchos::ParameterList);
343  graphParams->set("sorted", false);
344  }
345 
346  return rcp (new crs_graph_type (lclGraphSym,
347  row_map,
348  col_map_sym,
349  domain_map,
350  range_map,
351  importer,
352  Teuchos::null,
353  graphParams));
354  }
355 
356  template<class LocalOrdinal,
357  class GlobalOrdinal,
358  class Node>
359  Teuchos::RCP<CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
361  createTranspose (const Teuchos::RCP<Teuchos::ParameterList> &params)
362  {
363  using Teuchos::RCP;
364  // Do the local transpose
365  RCP<crs_graph_type> transGraphWithSharedRows = createTransposeLocal (params);
366 
367 #ifdef HAVE_TPETRA_MMM_TIMINGS
368  const std::string prefix = std::string ("Tpetra ") + label_ + ": ";
369  using Teuchos::TimeMonitor;
370  TimeMonitor MM (*TimeMonitor::getNewTimer (prefix + "Transpose TAFC"));
371 #endif
372 
373  // If transGraphWithSharedRows has an exporter, that's what we
374  // want. If it doesn't, the rows aren't actually shared, and we're
375  // done!
376  using export_type = Export<LocalOrdinal, GlobalOrdinal, Node>;
377  RCP<const export_type> exporter =
378  transGraphWithSharedRows->getExporter ();
379  if (exporter.is_null ()) {
380  return transGraphWithSharedRows;
381  }
382  else {
383  Teuchos::ParameterList labelList;
384 #ifdef HAVE_TPETRA_MMM_TIMINGS
385  labelList.set("Timer Label", label_);
386 #endif
387  if(! params.is_null ()) {
388  const char paramName[] = "compute global constants";
389  labelList.set (paramName, params->get (paramName, true));
390  }
391  // Use the Export object to do a fused Export and fillComplete.
392  // This always sorts the local graph after communication, so
393  // no need to set "sorted = false" in parameters.
394  return exportAndFillCompleteCrsGraph<crs_graph_type>
395  (transGraphWithSharedRows, *exporter, Teuchos::null,
396  Teuchos::null, Teuchos::rcpFromRef (labelList));
397  }
398  }
399 
400  template<class LocalOrdinal,
401  class GlobalOrdinal,
402  class Node>
403  Teuchos::RCP<CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
405  createTransposeLocal (const Teuchos::RCP<Teuchos::ParameterList> &params)
406  {
407  using Teuchos::RCP;
408  using Teuchos::rcp;
409  using Teuchos::rcp_dynamic_cast;
410  using LO = LocalOrdinal;
411  using GO = GlobalOrdinal;
412  using import_type = Tpetra::Import<LO, GO, Node>;
413  using export_type = Tpetra::Export<LO, GO, Node>;
414 
415 #ifdef HAVE_TPETRA_MMM_TIMINGS
416  std::string prefix = std::string("Tpetra ") + label_ + ": ";
417  using Teuchos::TimeMonitor;
418  TimeMonitor MM (*TimeMonitor::getNewTimer (prefix + "Transpose Local"));
419 #endif
420 
421  const bool sort = [&] () {
422  constexpr bool sortDefault = true; // see #4607 discussion
423  const char sortParamName[] = "sort";
424  return params.get () == nullptr ? sortDefault :
425  params->get (sortParamName, sortDefault);
426  } ();
427 
428  using local_graph_device_type = typename crs_graph_type::local_graph_device_type;
429  local_graph_device_type lclGraph = origGraph_->getLocalGraphDevice ();
430 
431  //Allocate views and call the other version of transpose_graph
432  using c_rowmap_t = typename local_graph_device_type::row_map_type;
433  using c_entries_t = typename local_graph_device_type::entries_type;
434  using rowmap_t = typename local_graph_device_type::row_map_type::non_const_type;
435  using entries_t = typename local_graph_device_type::entries_type::non_const_type;
436  LocalOrdinal numCols = origGraph_->getColMap()->getLocalNumElements();
437  rowmap_t lclGraphT_rowmap("Transpose rowmap", numCols + 1);
438  entries_t lclGraphT_entries(
439  Kokkos::ViewAllocateWithoutInitializing("Transpose entries"), lclGraph.entries.extent(0));
440  KokkosSparse::Impl::transpose_graph<
441  c_rowmap_t, c_entries_t,
442  rowmap_t, entries_t,
443  rowmap_t, typename local_graph_device_type::execution_space>(
444  lclGraph.numRows(), numCols,
445  lclGraph.row_map, lclGraph.entries,
446  lclGraphT_rowmap, lclGraphT_entries);
447 
448  if (sort)
449  KokkosSparse::sort_crs_graph<
450  typename local_graph_device_type::execution_space,
451  rowmap_t, entries_t>(
452  lclGraphT_rowmap,
453  lclGraphT_entries);
454 
455  //And construct the transpose local_graph_device_type
456  local_graph_device_type lclGraphT = local_graph_device_type(lclGraphT_entries, lclGraphT_rowmap);
457 
458  // Prebuild the importers and exporters the no-communication way,
459  // flipping the importers and exporters around.
460  const auto origExport = origGraph_->getExporter ();
461  RCP<const import_type> myImport = origExport.is_null () ?
462  Teuchos::null : rcp (new import_type (*origExport));
463  const auto origImport = origGraph_->getImporter ();
464  RCP<const export_type> myExport = origImport.is_null () ?
465  Teuchos::null : rcp (new export_type (*origImport));
466 
467  RCP<Teuchos::ParameterList> graphParams = Teuchos::null;
468  if(!sort) {
469  graphParams = rcp(new Teuchos::ParameterList);
470  graphParams->set("sorted", false);
471  }
472 
473  return rcp (new crs_graph_type (lclGraphT,
474  origGraph_->getColMap (),
475  origGraph_->getRowMap (),
476  origGraph_->getRangeMap (),
477  origGraph_->getDomainMap (),
478  myImport, myExport, graphParams));
479  }
480 
481  //
482  // Explicit instantiation macro
483  //
484  // Must be expanded from within the Tpetra namespace!
485  //
486 
487 #define TPETRA_CRSGRAPHTRANSPOSER_INSTANT(LO,GO,NODE) \
488  template class CrsGraphTransposer< LO , GO , NODE >;
489 
490 } // namespace Tpetra
491 
492 #endif
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Declaration and definition of functions for sorting &quot;short&quot; arrays of keys and corresponding values...
Kokkos::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, device_type, void, size_t > local_graph_device_type
The type of the part of the sparse graph on each MPI process.
Teuchos::RCP< crs_graph_type > createTransposeLocal(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Compute and return the transpose of the graph given to the constructor.
&quot;Local&quot; part of Map suitable for Kokkos kernels.
void sort(View &view, const size_t &size)
Convenience wrapper for std::sort for host-accessible views.
CrsGraphTransposer(const Teuchos::RCP< const crs_graph_type > &origGraph, const std::string &label=std::string())
Constructor that takes the graph to transpose.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Teuchos::RCP< crs_graph_type > createTranspose(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Compute and return the transpose of the graph given to the constructor.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
Teuchos::RCP< crs_graph_type > symmetrize(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Compute and return graph+graph^T of the graph given to the constructor.