Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_CrsGraphTransposer_def.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_CRSGRAPHTRANSPOSER_DEF_HPP
11 #define TPETRA_CRSGRAPHTRANSPOSER_DEF_HPP
12 
13 #include "Tpetra_CrsGraph.hpp"
14 #include "Tpetra_Export.hpp"
16 #include "Tpetra_Details_makeColMap.hpp"
18 #include "Teuchos_ParameterList.hpp"
19 #include "Teuchos_TimeMonitor.hpp"
20 #include "KokkosSparse_Utils.hpp"
21 #include "KokkosKernels_Handle.hpp"
22 #include "KokkosSparse_spadd.hpp"
23 
24 namespace Tpetra {
25 
26  template<typename GO,
27  typename LocalIndicesType,
28  typename GlobalIndicesType,
29  typename ColMapType>
30  struct ConvertLocalToGlobalFunctor
31  {
32  ConvertLocalToGlobalFunctor(
33  const LocalIndicesType& colindsOrig_,
34  const GlobalIndicesType& colindsConverted_,
35  const ColMapType& colmap_) :
36  colindsOrig (colindsOrig_),
37  colindsConverted (colindsConverted_),
38  colmap (colmap_)
39  {}
40  KOKKOS_INLINE_FUNCTION void
41  operator() (const GO i) const
42  {
43  colindsConverted(i) = colmap.getGlobalElement(colindsOrig(i));
44  }
45  LocalIndicesType colindsOrig;
46  GlobalIndicesType colindsConverted;
47  ColMapType colmap;
48  };
49 
50  template<class LO, class GO, class LOView, class GOView, class LocalMap>
51  struct ConvertGlobalToLocalFunctor
52  {
53  ConvertGlobalToLocalFunctor(LOView& lids_, const GOView& gids_, const LocalMap localColMap_)
54  : lids(lids_), gids(gids_), localColMap(localColMap_)
55  {}
56 
57  KOKKOS_FUNCTION void operator() (const GO i) const
58  {
59  lids(i) = localColMap.getLocalElement(gids(i));
60  }
61 
62  LOView lids;
63  const GOView gids;
64  const LocalMap localColMap;
65  };
66 
67 
68  template <typename size_type, typename ordinal_type,
69  typename ArowptrsT, typename BrowptrsT, typename CrowptrsT,
70  typename AcolindsT, typename BcolindsT, typename CcolindsT>
71  struct SortedNumericIndicesOnlyFunctor {
72 
73  SortedNumericIndicesOnlyFunctor(const ArowptrsT& Arowptrs_,
74  const BrowptrsT& Browptrs_,
75  const CrowptrsT& Crowptrs_,
76  const AcolindsT& Acolinds_,
77  const BcolindsT& Bcolinds_,
78  const CcolindsT& Ccolinds_)
79  : Arowptrs(Arowptrs_),
80  Browptrs(Browptrs_),
81  Crowptrs(Crowptrs_),
82  Acolinds(Acolinds_),
83  Bcolinds(Bcolinds_),
84  Ccolinds(Ccolinds_) {}
85 
86  KOKKOS_INLINE_FUNCTION void operator()(const ordinal_type i) const
87  {
88  const ordinal_type ORDINAL_MAX = Kokkos::ArithTraits<ordinal_type>::max();
89 
90  // count the union of nonzeros in Arow and Brow
91  size_type ai = 0;
92  size_type bi = 0;
93  size_type Arowstart = Arowptrs(i);
94  size_type Arowlen = Arowptrs(i + 1) - Arowstart;
95  size_type Browstart = Browptrs(i);
96  size_type Browlen = Browptrs(i + 1) - Browstart;
97  ordinal_type Acol = (Arowlen == 0) ? ORDINAL_MAX : Acolinds(Arowstart);
98  ordinal_type Bcol = (Browlen == 0) ? ORDINAL_MAX : Bcolinds(Browstart);
99  size_type Coffset = Crowptrs(i);
100  while (Acol != ORDINAL_MAX || Bcol != ORDINAL_MAX)
101  {
102  ordinal_type Ccol = (Acol < Bcol) ? Acol : Bcol;
103  while(Acol == Ccol)
104  {
105  ai++;
106  if(ai == Arowlen)
107  Acol = ORDINAL_MAX;
108  else
109  Acol = Acolinds(Arowstart + ai);
110  }
111  while(Bcol == Ccol)
112  {
113  bi++;
114  if(bi == Browlen)
115  Bcol = ORDINAL_MAX;
116  else
117  Bcol = Bcolinds(Browstart + bi);
118  }
119  Ccolinds(Coffset) = Ccol;
120  Coffset++;
121  }
122  }
123 
124  const ArowptrsT Arowptrs;
125  const BrowptrsT Browptrs;
126  const CrowptrsT Crowptrs;
127  const AcolindsT Acolinds;
128  const BcolindsT Bcolinds;
129  CcolindsT Ccolinds;
130  };
131 
132  template <typename size_type, typename ordinal_type,
133  typename ArowptrsT, typename BrowptrsT, typename CrowptrsT,
134  typename AcolindsT, typename BcolindsT, typename CcolindsT>
135  struct UnsortedNumericIndicesOnlyFunctor {
136 
137  UnsortedNumericIndicesOnlyFunctor(
138  const ArowptrsT Arowptrs_, const BrowptrsT Browptrs_, const CrowptrsT Crowptrs_,
139  const AcolindsT Acolinds_, const BcolindsT Bcolinds_, CcolindsT Ccolinds_,
140  const CcolindsT Apos_, const CcolindsT Bpos_)
141  : Arowptrs(Arowptrs_),
142  Browptrs(Browptrs_),
143  Crowptrs(Crowptrs_),
144  Acolinds(Acolinds_),
145  Bcolinds(Bcolinds_),
146  Ccolinds(Ccolinds_),
147  Apos(Apos_),
148  Bpos(Bpos_) {}
149 
150  KOKKOS_INLINE_FUNCTION void operator()(const ordinal_type i) const {
151  size_type CrowStart = Crowptrs(i);
152  size_type ArowStart = Arowptrs(i);
153  size_type ArowEnd = Arowptrs(i + 1);
154  size_type BrowStart = Browptrs(i);
155  size_type BrowEnd = Browptrs(i + 1);
156  // add in A entries, while setting C colinds
157  for (size_type j = ArowStart; j < ArowEnd; j++) {
158  Ccolinds(CrowStart + Apos(j)) = Acolinds(j);
159  }
160  // add in B entries, while setting C colinds
161  for (size_type j = BrowStart; j < BrowEnd; j++) {
162  Ccolinds(CrowStart + Bpos(j)) = Bcolinds(j);
163  }
164  }
165  const ArowptrsT Arowptrs;
166  const BrowptrsT Browptrs;
167  const CrowptrsT Crowptrs;
168  const AcolindsT Acolinds;
169  const BcolindsT Bcolinds;
170  CcolindsT Ccolinds;
171  const CcolindsT Apos;
172  const CcolindsT Bpos;
173  };
174 
175 
176  template<class LocalOrdinal,
177  class GlobalOrdinal,
178  class Node>
180  CrsGraphTransposer (const Teuchos::RCP<const crs_graph_type>& origGraph,
181  const std::string& label)
182  : origGraph_ (origGraph), label_ (label)
183  {}
184 
185  template<class LocalOrdinal,
186  class GlobalOrdinal,
187  class Node>
188  Teuchos::RCP<CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
190  symmetrize (const Teuchos::RCP<Teuchos::ParameterList> &params)
191  {
192  using Teuchos::RCP;
193  using device_type = typename Node::device_type;
194  using execution_space = typename device_type::execution_space;
195  using range_type = Kokkos::RangePolicy<execution_space, size_t>;
196  using local_graph_device_type = typename crs_graph_type::local_graph_device_type;
197  using impl_scalar_type = ::Tpetra::Details::DefaultTypes::scalar_type;
198  using row_ptrs_array = typename local_graph_device_type::row_map_type::non_const_type ;
199  using col_inds_array = typename local_graph_device_type::entries_type::non_const_type;
200  using local_map_type = typename map_type::local_map_type;
201  using global_col_inds_array = typename Kokkos::View<GlobalOrdinal*, device_type>;
202 
203  auto graph = origGraph_;
204  auto domain_map = graph->getDomainMap();
205  auto range_map = graph->getRangeMap();
206  auto row_map = graph->getRowMap();
207  auto col_map = graph->getColMap();
208  RCP<const map_type> col_map_sym;
209  RCP<const Import<LocalOrdinal,GlobalOrdinal,Node> > importer;
210 
211  TEUCHOS_ASSERT(domain_map->isSameAs(*range_map));
212  TEUCHOS_ASSERT(domain_map->isSameAs(*row_map));
213 
214  // Do the transpose
215  RCP<crs_graph_type> graphT = createTranspose (params);
216 
217  auto col_map_T = graphT->getColMap();
218  TEUCHOS_ASSERT(!col_map_T.is_null());
219  TEUCHOS_ASSERT(domain_map->isSameAs(*graphT->getDomainMap()));
220 
221  bool graphSorted = graph->isSorted();
222  bool graphTSorted = graphT->isSorted();
223  bool sorted = graphSorted && graphTSorted;
224  bool matchingColMaps = col_map->isSameAs(*col_map_T);
225 
226  auto lclGraph = graph->getLocalGraphDevice();
227  auto lclGraphT = graphT->getLocalGraphDevice();
228 
229  using KKH_LO = KokkosKernels::Experimental::KokkosKernelsHandle<size_t, LocalOrdinal, impl_scalar_type,
230  typename Node::execution_space, typename Node::memory_space, typename Node::memory_space>;
231  using KKH_GO = KokkosKernels::Experimental::KokkosKernelsHandle<size_t, GlobalOrdinal, impl_scalar_type,
232  typename Node::execution_space, typename Node::memory_space, typename Node::memory_space>;
233 
234  auto rowptrs = lclGraph.row_map;
235  auto rowptrsT = lclGraphT.row_map;
236  auto colinds = lclGraph.entries;
237  auto colindsT = lclGraphT.entries;
238 
239  auto nrows = rowptrs.extent(0) - 1;
240  auto rowptrsSym = row_ptrs_array(Kokkos::ViewAllocateWithoutInitializing("row ptrs sym"), nrows + 1);
241 
242  col_inds_array colindsSym;
243 
244  if(!matchingColMaps) {
245  // convert indices of local graph to GlobalOrdinal
246  auto lclColmap = col_map->getLocalMap();
247  global_col_inds_array colindsConverted(Kokkos::ViewAllocateWithoutInitializing("colinds (converted)"), colinds.extent(0));
248  ConvertLocalToGlobalFunctor<GlobalOrdinal, col_inds_array, global_col_inds_array, local_map_type> convert(colinds, colindsConverted, lclColmap);
249  Kokkos::parallel_for("colInds (converted)", range_type(0, colinds.extent(0)), convert);
250 
251  // convert indices of local graphT to GlobalOrdinal
252  auto lclColmapT = col_map_T->getLocalMap();
253  global_col_inds_array colindsTConverted(Kokkos::ViewAllocateWithoutInitializing("colindsT (converted)"), colindsT.extent(0));
254  ConvertLocalToGlobalFunctor<GlobalOrdinal, col_inds_array, global_col_inds_array, local_map_type> convertT(colindsT, colindsTConverted, lclColmapT);
255  Kokkos::parallel_for("colIndsT (converted)", range_type(0, colindsT.extent(0)), convertT);
256 
257  // sum graph and graphT in GlobalOrdinal
258  KKH_GO handle;
259  handle.create_spadd_handle(false);
260  auto addHandle = handle.get_spadd_handle();
261 
262  global_col_inds_array globalColindsSym;
263 
264  KokkosSparse::spadd_symbolic
265  (&handle,
266  nrows, graph->getGlobalNumCols(),
267  rowptrs, colindsConverted, rowptrsT, colindsTConverted, rowptrsSym);
268  globalColindsSym = global_col_inds_array(Kokkos::ViewAllocateWithoutInitializing("global colinds sym"), addHandle->get_c_nnz());
269 
270  UnsortedNumericIndicesOnlyFunctor<
271  size_t, GlobalOrdinal,
272  typename row_ptrs_array::const_type, typename row_ptrs_array::const_type, row_ptrs_array,
273  typename global_col_inds_array::const_type, typename global_col_inds_array::const_type, global_col_inds_array>
274  unsortedNumeric(rowptrs, rowptrsT, rowptrsSym,
275  colindsConverted, colindsTConverted, globalColindsSym,
276  addHandle->get_a_pos(), addHandle->get_b_pos());
277  Kokkos::parallel_for("KokkosSparse::SpAdd:Numeric::InputNotSorted",
278  range_type(0, nrows), unsortedNumeric);
279 
280  // build column map for graphSym
281  Tpetra::Details::makeColMap<LocalOrdinal, GlobalOrdinal, Node>
282  (col_map_sym, domain_map, globalColindsSym);
283 
284  // convert indices of local graphSym to LocalOrdinal
285  auto lclColmapSym = col_map_sym->getLocalMap();
286  colindsSym = col_inds_array("colindsSym", globalColindsSym.extent(0));
287  ConvertGlobalToLocalFunctor<LocalOrdinal, GlobalOrdinal, col_inds_array, global_col_inds_array, typename map_type::local_map_type> convertSym(colindsSym, globalColindsSym, lclColmapSym);
288  Kokkos::parallel_for(range_type(0, globalColindsSym.extent(0)), convertSym);
289 
290  } else {
291 
292  // sum graph and graphT in LocalOrdinal
293  KKH_LO handle;
294  handle.create_spadd_handle(sorted);
295  auto addHandle = handle.get_spadd_handle();
296 
297  KokkosSparse::spadd_symbolic
298  (&handle,
299  nrows, graph->getGlobalNumCols(),
300  rowptrs, colinds, rowptrsT, colindsT, rowptrsSym);
301  colindsSym = col_inds_array(Kokkos::ViewAllocateWithoutInitializing("C colinds"), addHandle->get_c_nnz());
302 
303  if (sorted) {
304  SortedNumericIndicesOnlyFunctor<
305  size_t, LocalOrdinal,
306  typename row_ptrs_array::const_type, typename row_ptrs_array::const_type, row_ptrs_array,
307  typename col_inds_array::const_type, typename col_inds_array::const_type, col_inds_array>
308  sortedNumeric(rowptrs, rowptrsT, rowptrsSym,
309  colinds, colindsT, colindsSym);
310  Kokkos::parallel_for("KokkosSparse::SpAdd:Numeric::InputSorted",
311  range_type(0, nrows), sortedNumeric);
312 
313  } else {
314  UnsortedNumericIndicesOnlyFunctor<
315  size_t, LocalOrdinal,
316  typename row_ptrs_array::const_type, typename row_ptrs_array::const_type, row_ptrs_array,
317  typename col_inds_array::const_type, typename col_inds_array::const_type, col_inds_array>
318  unsortedNumeric(rowptrs, rowptrsT, rowptrsSym,
319  colinds, colindsT, colindsSym,
320  addHandle->get_a_pos(), addHandle->get_b_pos());
321  Kokkos::parallel_for("KokkosSparse::SpAdd:Numeric::InputNotSorted",
322  range_type(0, nrows), unsortedNumeric);
323  }
324 
325  // column map for graphSym is graph's column map
326  col_map_sym = col_map;
327  importer = graph->getImporter();
328  }
329 
330  bool sort = true;
331  if (sort)
332  KokkosSparse::sort_crs_graph<execution_space, row_ptrs_array, col_inds_array>(rowptrsSym, colindsSym);
333 
334  local_graph_device_type lclGraphSym = local_graph_device_type(colindsSym, rowptrsSym);
335 
336  RCP<Teuchos::ParameterList> graphParams = Teuchos::null;
337  if(!sort) {
338  graphParams = rcp(new Teuchos::ParameterList);
339  graphParams->set("sorted", false);
340  }
341 
342  return rcp (new crs_graph_type (lclGraphSym,
343  row_map,
344  col_map_sym,
345  domain_map,
346  range_map,
347  importer,
348  Teuchos::null,
349  graphParams));
350  }
351 
352  template<class LocalOrdinal,
353  class GlobalOrdinal,
354  class Node>
355  Teuchos::RCP<CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
357  createTranspose (const Teuchos::RCP<Teuchos::ParameterList> &params)
358  {
359  using Teuchos::RCP;
360  // Do the local transpose
361  RCP<crs_graph_type> transGraphWithSharedRows = createTransposeLocal (params);
362 
363 #ifdef HAVE_TPETRA_MMM_TIMINGS
364  const std::string prefix = std::string ("Tpetra ") + label_ + ": ";
365  using Teuchos::TimeMonitor;
366  TimeMonitor MM (*TimeMonitor::getNewTimer (prefix + "Transpose TAFC"));
367 #endif
368 
369  // If transGraphWithSharedRows has an exporter, that's what we
370  // want. If it doesn't, the rows aren't actually shared, and we're
371  // done!
372  using export_type = Export<LocalOrdinal, GlobalOrdinal, Node>;
373  RCP<const export_type> exporter =
374  transGraphWithSharedRows->getExporter ();
375  if (exporter.is_null ()) {
376  return transGraphWithSharedRows;
377  }
378  else {
379  Teuchos::ParameterList labelList;
380 #ifdef HAVE_TPETRA_MMM_TIMINGS
381  labelList.set("Timer Label", label_);
382 #endif
383  if(! params.is_null ()) {
384  const char paramName[] = "compute global constants";
385  labelList.set (paramName, params->get (paramName, true));
386  }
387  // Use the Export object to do a fused Export and fillComplete.
388  // This always sorts the local graph after communication, so
389  // no need to set "sorted = false" in parameters.
390  return exportAndFillCompleteCrsGraph<crs_graph_type>
391  (transGraphWithSharedRows, *exporter, Teuchos::null,
392  Teuchos::null, Teuchos::rcpFromRef (labelList));
393  }
394  }
395 
396  template<class LocalOrdinal,
397  class GlobalOrdinal,
398  class Node>
399  Teuchos::RCP<CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
401  createTransposeLocal (const Teuchos::RCP<Teuchos::ParameterList> &params)
402  {
403  using Teuchos::RCP;
404  using Teuchos::rcp;
405  using Teuchos::rcp_dynamic_cast;
406  using LO = LocalOrdinal;
407  using GO = GlobalOrdinal;
408  using import_type = Tpetra::Import<LO, GO, Node>;
409  using export_type = Tpetra::Export<LO, GO, Node>;
410 
411 #ifdef HAVE_TPETRA_MMM_TIMINGS
412  std::string prefix = std::string("Tpetra ") + label_ + ": ";
413  using Teuchos::TimeMonitor;
414  TimeMonitor MM (*TimeMonitor::getNewTimer (prefix + "Transpose Local"));
415 #endif
416 
417  const bool sort = [&] () {
418  constexpr bool sortDefault = true; // see #4607 discussion
419  const char sortParamName[] = "sort";
420  return params.get () == nullptr ? sortDefault :
421  params->get (sortParamName, sortDefault);
422  } ();
423 
424  using local_graph_device_type = typename crs_graph_type::local_graph_device_type;
425  local_graph_device_type lclGraph = origGraph_->getLocalGraphDevice ();
426 
427  //Allocate views and call the other version of transpose_graph
428  using c_rowmap_t = typename local_graph_device_type::row_map_type;
429  using c_entries_t = typename local_graph_device_type::entries_type;
430  using rowmap_t = typename local_graph_device_type::row_map_type::non_const_type;
431  using entries_t = typename local_graph_device_type::entries_type::non_const_type;
432  LocalOrdinal numCols = origGraph_->getColMap()->getLocalNumElements();
433  rowmap_t lclGraphT_rowmap("Transpose rowmap", numCols + 1);
434  entries_t lclGraphT_entries(
435  Kokkos::ViewAllocateWithoutInitializing("Transpose entries"), lclGraph.entries.extent(0));
436  KokkosSparse::Impl::transpose_graph<
437  c_rowmap_t, c_entries_t,
438  rowmap_t, entries_t,
439  rowmap_t, typename local_graph_device_type::execution_space>(
440  lclGraph.numRows(), numCols,
441  lclGraph.row_map, lclGraph.entries,
442  lclGraphT_rowmap, lclGraphT_entries);
443 
444  if (sort)
445  KokkosSparse::sort_crs_graph<
446  typename local_graph_device_type::execution_space,
447  rowmap_t, entries_t>(
448  lclGraphT_rowmap,
449  lclGraphT_entries);
450 
451  //And construct the transpose local_graph_device_type
452  local_graph_device_type lclGraphT = local_graph_device_type(lclGraphT_entries, lclGraphT_rowmap);
453 
454  // Prebuild the importers and exporters the no-communication way,
455  // flipping the importers and exporters around.
456  const auto origExport = origGraph_->getExporter ();
457  RCP<const import_type> myImport = origExport.is_null () ?
458  Teuchos::null : rcp (new import_type (*origExport));
459  const auto origImport = origGraph_->getImporter ();
460  RCP<const export_type> myExport = origImport.is_null () ?
461  Teuchos::null : rcp (new export_type (*origImport));
462 
463  RCP<Teuchos::ParameterList> graphParams = Teuchos::null;
464  if(!sort) {
465  graphParams = rcp(new Teuchos::ParameterList);
466  graphParams->set("sorted", false);
467  }
468 
469  return rcp (new crs_graph_type (lclGraphT,
470  origGraph_->getColMap (),
471  origGraph_->getRowMap (),
472  origGraph_->getRangeMap (),
473  origGraph_->getDomainMap (),
474  myImport, myExport, graphParams));
475  }
476 
477  //
478  // Explicit instantiation macro
479  //
480  // Must be expanded from within the Tpetra namespace!
481  //
482 
483 #define TPETRA_CRSGRAPHTRANSPOSER_INSTANT(LO,GO,NODE) \
484  template class CrsGraphTransposer< LO , GO , NODE >;
485 
486 } // namespace Tpetra
487 
488 #endif
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
KokkosSparse::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, device_type, void, size_t > local_graph_device_type
The type of the part of the sparse graph on each MPI process.
Declaration and definition of functions for sorting &quot;short&quot; arrays of keys and corresponding values...
Teuchos::RCP< crs_graph_type > createTransposeLocal(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Compute and return the transpose of the graph given to the constructor.
&quot;Local&quot; part of Map suitable for Kokkos kernels.
void sort(View &view, const size_t &size)
Convenience wrapper for std::sort for host-accessible views.
CrsGraphTransposer(const Teuchos::RCP< const crs_graph_type > &origGraph, const std::string &label=std::string())
Constructor that takes the graph to transpose.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Teuchos::RCP< crs_graph_type > createTranspose(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Compute and return the transpose of the graph given to the constructor.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
Teuchos::RCP< crs_graph_type > symmetrize(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Compute and return graph+graph^T of the graph given to the constructor.