MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_Aggregates_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // MueLu: A package for multigrid based preconditioning
4 //
5 // Copyright 2012 NTESS and the MueLu contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef MUELU_AGGREGATES_DEF_HPP
11 #define MUELU_AGGREGATES_DEF_HPP
12 
13 #include <Xpetra_Map.hpp>
14 #include <Xpetra_Vector.hpp>
15 #include <Xpetra_MultiVectorFactory.hpp>
16 #include <Xpetra_VectorFactory.hpp>
17 
18 #include "MueLu_LWGraph_kokkos.hpp"
19 
20 #include "MueLu_LWGraph.hpp"
21 #include "MueLu_Utilities_decl.hpp"
23 
24 namespace MueLu {
25 
26 template <class LocalOrdinal, class GlobalOrdinal, class Node>
28  numAggregates_ = 0;
29  numGlobalAggregates_ = 0;
30 
31  vertex2AggId_ = LOMultiVectorFactory::Build(graph.GetImportMap(), 1);
32  vertex2AggId_->putScalar(MUELU_UNAGGREGATED);
33 
34  procWinner_ = LOVectorFactory::Build(graph.GetImportMap());
35  procWinner_->putScalar(MUELU_UNASSIGNED);
36 
37  isRoot_ = Teuchos::ArrayRCP<bool>(graph.GetImportMap()->getLocalNumElements(), false);
38 
39  // slow but safe, force TentativePFactory to build column map for P itself
40  aggregatesIncludeGhosts_ = true;
41 }
42 
43 template <class LocalOrdinal, class GlobalOrdinal, class Node>
46  numAggregates_ = 0;
47  numGlobalAggregates_ = 0;
48 
49  vertex2AggId_ = LOMultiVectorFactory::Build(graph.GetImportMap(), 1);
50  vertex2AggId_->putScalar(MUELU_UNAGGREGATED);
51 
52  procWinner_ = LOVectorFactory::Build(graph.GetImportMap());
53  procWinner_->putScalar(MUELU_UNASSIGNED);
54 
55  isRoot_ = Teuchos::ArrayRCP<bool>(graph.GetImportMap()->getLocalNumElements(), false);
56 
57  // slow but safe, force TentativePFactory to build column map for P itself
58  aggregatesIncludeGhosts_ = true;
59 }
60 
61 template <class LocalOrdinal, class GlobalOrdinal, class Node>
64  numAggregates_ = 0;
65  numGlobalAggregates_ = 0;
66 
67  vertex2AggId_ = LOMultiVectorFactory::Build(map, 1);
68  vertex2AggId_->putScalar(MUELU_UNAGGREGATED);
69 
70  procWinner_ = LOVectorFactory::Build(map);
71  procWinner_->putScalar(MUELU_UNASSIGNED);
72 
73  isRoot_ = Teuchos::ArrayRCP<bool>(map->getLocalNumElements(), false);
74 
75  // slow but safe, force TentativePFactory to build column map for P itself
76  aggregatesIncludeGhosts_ = true;
77 }
78 
79 template <class LocalOrdinal, class GlobalOrdinal, class Node>
82  if (aggregateSizes_.size() && !forceRecompute) {
83  return aggregateSizes_;
84 
85  } else {
86  // It is necessary to initialize this to 0
87  aggregates_sizes_type aggregateSizes("aggregates", numAggregates_);
88 
89  int myPID = GetMap()->getComm()->getRank();
90 
91  auto vertex2AggId = vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly);
92  auto procWinner = procWinner_->getDeviceLocalView(Xpetra::Access::ReadOnly);
93 
94  typename AppendTrait<decltype(aggregateSizes_), Kokkos::Atomic>::type aggregateSizesAtomic = aggregateSizes;
95  Kokkos::parallel_for(
96  "MueLu:Aggregates:ComputeAggregateSizes:for", range_type(0, procWinner.size()),
97  KOKKOS_LAMBDA(const LO i) {
98  if (procWinner(i, 0) == myPID)
99  aggregateSizesAtomic(vertex2AggId(i, 0))++;
100  });
101 
102  aggregateSizes_ = aggregateSizes;
103 
104  return aggregateSizes;
105  }
106 }
107 
108 template <class LocalOrdinal, class GlobalOrdinal, class Node>
111  ComputeAggregateSizesArrayRCP(bool forceRecompute) const {
112  auto aggregateSizes = this->ComputeAggregateSizes(forceRecompute);
113 
114  // if this is the first time this is called, setup the host mirror and fill it
115  if (!aggregateSizesHost_.is_allocated()) {
116  aggregateSizesHost_ = Kokkos::create_mirror_view(aggregateSizes);
117  Kokkos::deep_copy(aggregateSizesHost_, aggregateSizes);
118  } else {
119  // otherwise, only update if we forced a recompute
120  if (forceRecompute)
121  Kokkos::deep_copy(aggregateSizesHost_, aggregateSizes);
122  }
123 
124  // put the data in an ArrayRCP, but do not give it ownership of the data
125  Teuchos::ArrayRCP<LocalOrdinal> aggregateSizesArrayRCP(aggregateSizesHost_.data(), 0, aggregateSizesHost_.extent(0), false);
126 
127  return aggregateSizesArrayRCP;
128 }
129 
130 template <class LocalOrdinal, class GlobalOrdinal, class Node>
133  using row_map_type = typename local_graph_type::row_map_type;
134  using entries_type = typename local_graph_type::entries_type;
135  using size_type = typename local_graph_type::size_type;
136 
137  auto numAggregates = numAggregates_;
138 
139  if (static_cast<LO>(graph_.numRows()) == numAggregates)
140  return graph_;
141 
142  auto vertex2AggId = vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly);
143  auto procWinner = procWinner_->getDeviceLocalView(Xpetra::Access::ReadOnly);
144  auto sizes = ComputeAggregateSizes();
145 
146  // FIXME_KOKKOS: replace by ViewAllocateWithoutInitializing + rows(0) = 0.
147  typename row_map_type::non_const_type rows("Agg_rows", numAggregates + 1); // rows(0) = 0 automatically
148 
149  // parallel_scan (exclusive)
150  Kokkos::parallel_scan(
151  "MueLu:Aggregates:GetGraph:compute_rows", range_type(0, numAggregates),
152  KOKKOS_LAMBDA(const LO i, LO& update, const bool& final_pass) {
153  update += sizes(i);
154  if (final_pass)
155  rows(i + 1) = update;
156  });
157 
158  decltype(rows) offsets(Kokkos::ViewAllocateWithoutInitializing("Agg_offsets"), numAggregates + 1); // +1 is just for ease
159  Kokkos::deep_copy(offsets, rows);
160 
161  int myPID = GetMap()->getComm()->getRank();
162 
163  size_type numNNZ;
164  {
165  Kokkos::View<size_type, device_type> numNNZ_device = Kokkos::subview(rows, numAggregates);
166  typename Kokkos::View<size_type, device_type>::HostMirror numNNZ_host = Kokkos::create_mirror_view(numNNZ_device);
167  Kokkos::deep_copy(numNNZ_host, numNNZ_device);
168  numNNZ = numNNZ_host();
169  }
170  typename entries_type::non_const_type cols(Kokkos::ViewAllocateWithoutInitializing("Agg_cols"), numNNZ);
171  size_t realnnz = 0;
172  Kokkos::parallel_reduce(
173  "MueLu:Aggregates:GetGraph:compute_cols", range_type(0, procWinner.size()),
174  KOKKOS_LAMBDA(const LO i, size_t& nnz) {
175  if (procWinner(i, 0) == myPID) {
176  typedef typename std::remove_reference<decltype(offsets(0))>::type atomic_incr_type;
177  auto idx = Kokkos::atomic_fetch_add(&offsets(vertex2AggId(i, 0)), atomic_incr_type(1));
178  cols(idx) = i;
179  nnz++;
180  }
181  },
182  realnnz);
184  "MueLu: Internal error: Something is wrong with aggregates graph construction: numNNZ = " << numNNZ << " != " << realnnz << " = realnnz");
185 
186  graph_ = local_graph_type(cols, rows);
187 
188  return graph_;
189 }
190 
191 template <class LocalOrdinal, class GlobalOrdinal, class Node>
193  LO numAggs = GetNumAggregates();
194  LO numNodes = vertex2AggId_->getLocalLength();
195  auto vertex2AggId = vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly);
196  typename aggregates_sizes_type::const_type aggSizes = ComputeAggregateSizes(true);
198 
199  aggPtr = LO_view("aggPtr", numAggs + 1);
200  aggNodes = LO_view("aggNodes", numNodes);
201  LO_view aggCurr("agg curr", numAggs + 1);
202 
203  // Construct the "rowptr" and the counter
204  Kokkos::parallel_scan(
205  "MueLu:Aggregates:ComputeNodesInAggregate:scan", range_type(0, numAggs + 1),
206  KOKKOS_LAMBDA(const LO aggIdx, LO& aggOffset, bool final_pass) {
207  LO count = 0;
208  if (aggIdx < numAggs)
209  count = aggSizes(aggIdx);
210  if (final_pass) {
211  aggPtr(aggIdx) = aggOffset;
212  aggCurr(aggIdx) = aggOffset;
213  if (aggIdx == numAggs)
214  aggCurr(numAggs) = 0; // use this for counting unaggregated nodes
215  }
216  aggOffset += count;
217  });
218 
219  // Preallocate unaggregated to the correct size
220  LO numUnaggregated = 0;
221  Kokkos::parallel_reduce(
222  "MueLu:Aggregates:ComputeNodesInAggregate:unaggregatedSize", range_type(0, numNodes),
223  KOKKOS_LAMBDA(const LO nodeIdx, LO& count) {
224  if (vertex2AggId(nodeIdx, 0) == INVALID)
225  count++;
226  },
227  numUnaggregated);
228  unaggregated = LO_view("unaggregated", numUnaggregated);
229 
230  // Stick the nodes in each aggregate's spot
231  Kokkos::parallel_for(
232  "MueLu:Aggregates:ComputeNodesInAggregate:for", range_type(0, numNodes),
233  KOKKOS_LAMBDA(const LO nodeIdx) {
234  LO aggIdx = vertex2AggId(nodeIdx, 0);
235  if (aggIdx != INVALID) {
236  // atomic postincrement aggCurr(aggIdx) each time
237  aggNodes(Kokkos::atomic_fetch_add(&aggCurr(aggIdx), 1)) = nodeIdx;
238  } else {
239  // same, but using last entry of aggCurr for unaggregated nodes
240  unaggregated(Kokkos::atomic_fetch_add(&aggCurr(numAggs), 1)) = nodeIdx;
241  }
242  });
243 }
244 
245 template <class LocalOrdinal, class GlobalOrdinal, class Node>
247  if (numGlobalAggregates_ == -1)
248  return BaseClass::description() + "{nGlobalAggregates = not computed}";
249  else
250  return BaseClass::description() + "{nGlobalAggregates = " + toString(numGlobalAggregates_) + "}";
251 }
252 
253 template <class LocalOrdinal, class GlobalOrdinal, class Node>
256 
257  if (verbLevel & Statistics1) {
258  if (numGlobalAggregates_ == -1)
259  out0 << "Global number of aggregates: not computed " << std::endl;
260  else
261  out0 << "Global number of aggregates: " << numGlobalAggregates_ << std::endl;
262  }
263 }
264 
265 template <class LocalOrdinal, class GlobalOrdinal, class Node>
267  if (numGlobalAggregates_ != -1) {
268  LO nAggregates = GetNumAggregates();
269  GO nGlobalAggregates;
270  MueLu_sumAll(vertex2AggId_->getMap()->getComm(), (GO)nAggregates, nGlobalAggregates);
271  SetNumGlobalAggregates(nGlobalAggregates);
272  }
273  return numGlobalAggregates_;
274 }
275 
276 template <class LocalOrdinal, class GlobalOrdinal, class Node>
279  return vertex2AggId_->getMap();
280 }
281 
282 } // namespace MueLu
283 
284 #endif // MUELU_AGGREGATES_DEF_HPP
Kokkos::View< LocalOrdinal *, device_type > aggregates_sizes_type
#define MUELU_UNASSIGNED
#define MueLu_sumAll(rcpComm, in, out)
Lightweight MueLu representation of a compressed row storage graph.
std::string toString(const T &what)
Little helper function to convert non-string types to strings.
Container class for aggregation information.
GlobalOrdinal GO
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Print more statistics.
const RCP< const Map > GetMap() const
returns (overlapping) map of aggregate/node distribution
LocalOrdinal LO
MueLu::DefaultGlobalOrdinal GlobalOrdinal
typename LWGraph_kokkos::local_graph_type local_graph_type
#define MUELU_UNAGGREGATED
void ComputeNodesInAggregate(LO_view &aggPtr, LO_view &aggNodes, LO_view &unaggregated) const
Generates a compressed list of nodes in each aggregate, where the entries in aggNodes[aggPtr[i]] up t...
GO GetNumGlobalAggregatesComputeIfNeeded()
Get global number of aggregates.
Teuchos::ArrayRCP< LocalOrdinal > ComputeAggregateSizesArrayRCP(bool forceRecompute=false) const
Compute sizes of aggregates.
Kokkos::RangePolicy< local_ordinal_type, execution_space > range_type
#define MUELU_DESCRIBE
Helper macro for implementing Describable::describe() for BaseClass objects.
const RCP< const Map > GetImportMap() const
Return overlapping import map (nodes).
Kokkos::View< local_ordinal_type *, device_type > LO_view
Lightweight MueLu representation of a compressed row storage graph.
Aggregates(const LWGraph &graph)
Standard constructor for Aggregates structure.
Exception throws to report errors in the internal logical of the program.
void print(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=verbLevel_default) const
Print the object with some verbosity level to an FancyOStream object.
std::string description() const
Return a simple one-line description of this object.
local_graph_type GetGraph() const
aggregates_sizes_type::const_type ComputeAggregateSizes(bool forceRecompute=false) const
Compute sizes of aggregates.
virtual std::string description() const
Return a simple one-line description of this object.