MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_AggregationPhase2aAlgorithm_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // MueLu: A package for multigrid based preconditioning
4 //
5 // Copyright 2012 NTESS and the MueLu contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef MUELU_AGGREGATIONPHASE2AALGORITHM_DEF_HPP_
11 #define MUELU_AGGREGATIONPHASE2AALGORITHM_DEF_HPP_
12 
13 #include <Teuchos_Comm.hpp>
14 #include <Teuchos_CommHelpers.hpp>
15 
16 #include <Xpetra_Vector.hpp>
17 
19 
20 #include "MueLu_LWGraph.hpp"
21 #include "MueLu_Aggregates.hpp"
22 #include "MueLu_Exceptions.hpp"
23 #include "MueLu_Monitor.hpp"
24 
25 #include "Kokkos_Sort.hpp"
26 
27 namespace MueLu {
28 
29 template <class LocalOrdinal, class GlobalOrdinal, class Node>
31  Monitor m(*this, "BuildAggregatesNonKokkos");
32 
33  int minNodesPerAggregate = params.get<int>("aggregation: min agg size");
34  int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
35  bool matchMLbehavior = params.get<bool>("aggregation: match ML phase2a");
36 
37  const LO numRows = graph.GetNodeNumVertices();
38  const int myRank = graph.GetComm()->getRank();
39 
40  ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0);
41  ArrayRCP<LO> procWinner = aggregates.GetProcWinner()->getDataNonConst(0);
42 
43  LO numLocalAggregates = aggregates.GetNumAggregates();
44 
45  LO numLocalNodes = procWinner.size();
46  LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
47 
48  const double aggFactor = params.get<double>("aggregation: phase2a agg factor");
49  double factor;
50 
51  if (matchMLbehavior) {
52  // Note: ML uses global counts to set the factor
53  // Passing # of nonaggregated nodes and # of nodes via aggStat
54  GO in_data[2] = {(GO)numNonAggregatedNodes, (GO)aggStat.size()};
55  GO out_data[2];
56  Teuchos::reduceAll(*graph.GetComm(), Teuchos::REDUCE_SUM, 2, in_data, out_data);
57  GO phase_one_aggregated = out_data[1] - out_data[0];
58  factor = as<double>(phase_one_aggregated) / (out_data[1] + 1);
59 
60  LO agg_stat_unaggregated = 0;
61  LO agg_stat_aggregated = 0;
62  LO agg_stat_bdry = 0;
63  for (LO i = 0; i < (LO)aggStat.size(); i++) {
64  if (aggStat[i] == AGGREGATED)
65  agg_stat_aggregated++;
66  else if (aggStat[i] == BOUNDARY)
67  agg_stat_bdry++;
68  else
69  agg_stat_unaggregated++;
70  }
71 
72  // NOTE: ML always uses 3 as minNodesPerAggregate
73  minNodesPerAggregate = 3;
74 
75  } else {
76  // MueLu defaults to using local counts to set the factor
77  factor = as<double>(numLocalAggregated) / (numLocalNodes + 1);
78  }
79 
80  // Now apply aggFactor
81  factor = pow(factor, aggFactor);
82 
83  int aggIndex = -1;
84  size_t aggSize = 0;
85  std::vector<int> aggList(graph.getLocalMaxNumRowEntries());
86 
87  for (LO rootCandidate = 0; rootCandidate < numRows; rootCandidate++) {
88  if (aggStat[rootCandidate] != READY) {
89  continue;
90  }
91 
92  LO numNeighbors = 0;
93  aggSize = 0;
94  if (matchMLbehavior) {
95  aggList[aggSize++] = rootCandidate;
96  numNeighbors++;
97  }
98 
99  auto neighOfINode = graph.getNeighborVertices(rootCandidate);
100 
101  LO num_nonaggd_neighbors = 0, num_local_neighbors = 0;
102  for (int j = 0; j < neighOfINode.length; j++) {
103  LO neigh = neighOfINode(j);
104  if (graph.isLocalNeighborVertex(neigh))
105  num_local_neighbors++;
106 
107  if (neigh != rootCandidate) {
108  if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == READY) {
109  // If aggregate size does not exceed max size, add node to the tentative aggregate
110  // NOTE: We do not exit the loop over all neighbours since we have still
111  // to count all aggregated neighbour nodes for the aggregation criteria
112  // NOTE: We check here for the maximum aggregation size. If we would do it below
113  // with all the other check too big aggregates would not be accepted at all.
114  if (aggSize < as<size_t>(maxNodesPerAggregate))
115  aggList[aggSize++] = neigh;
116  num_nonaggd_neighbors++;
117  }
118 
119  numNeighbors++;
120  }
121  }
122 
123  bool accept_aggregate;
124  if (matchMLbehavior) {
125  // ML does this calculation slightly differently than MueLu does by default, specifically it
126  // uses the *local* number of neigbors, regardless of what they are.
127  // NOTE: ML does zero compression here. Not sure if it matters
128  // NOTE: ML uses a hardcoded value 3 instead of minNodesPerAggregate. This has been set above
129  LO rowi_N = num_local_neighbors;
130  num_nonaggd_neighbors++; // ML counts the node itself as a nonaggd_neighbor
131  accept_aggregate = (rowi_N > as<LO>(minNodesPerAggregate)) && (num_nonaggd_neighbors > (factor * rowi_N));
132  } else {
133  accept_aggregate = (aggSize > as<size_t>(minNodesPerAggregate)) && (aggSize > factor * numNeighbors);
134  }
135 
136  if (accept_aggregate) {
137  // Accept new aggregate
138  // rootCandidate becomes the root of the newly formed aggregate
139  aggregates.SetIsRoot(rootCandidate);
140  aggIndex = numLocalAggregates++;
141 
142  for (size_t k = 0; k < aggSize; k++) {
143  aggStat[aggList[k]] = AGGREGATED;
144  vertex2AggId[aggList[k]] = aggIndex;
145  procWinner[aggList[k]] = myRank;
146  }
147 
148  numNonAggregatedNodes -= aggSize;
149  }
150  }
151 
152  // update aggregate object
153  aggregates.SetNumAggregates(numLocalAggregates);
154 }
155 
156 template <class LocalOrdinal, class GlobalOrdinal, class Node>
159  const LWGraph_kokkos& graph,
160  Aggregates& aggregates,
162  LO& numNonAggregatedNodes) const {
163  if (params.get<bool>("aggregation: deterministic")) {
164  Monitor m(*this, "BuildAggregatesDeterministic");
165  BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes);
166  } else {
167  Monitor m(*this, "BuildAggregatesRandom");
168  BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes);
169  }
170 
171 } // BuildAggregates
172 
173 template <class LO, class GO, class Node>
176  const LWGraph_kokkos& graph,
177  Aggregates& aggregates,
179  LO& numNonAggregatedNodes) const {
180  using device_type = typename LWGraph_kokkos::device_type;
181  using execution_space = typename LWGraph_kokkos::execution_space;
182 
183  const int minNodesPerAggregate = params.get<int>("aggregation: min agg size");
184  const int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
185  bool matchMLbehavior = params.get<bool>("aggregation: match ML phase2a");
186 
187  const LO numRows = graph.GetNodeNumVertices();
188  const int myRank = graph.GetComm()->getRank();
189 
190  auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
191  auto procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
192  auto colors = aggregates.GetGraphColors();
193  const LO numColors = aggregates.GetGraphNumColors();
194 
195  auto lclLWGraph = graph;
196 
197  LO numLocalNodes = numRows;
198  LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
199 
200  const double aggFactor = 0.5;
201  double factor = static_cast<double>(numLocalAggregated) / (numLocalNodes + 1);
202  factor = pow(factor, aggFactor);
203 
204  // LBV on Sept 12, 2019: this looks a little heavy handed,
205  // I'm not sure a view is needed to perform atomic updates.
206  // If we can avoid this and use a simple LO that would be
207  // simpler for later maintenance.
208  Kokkos::View<LO, device_type> numLocalAggregates("numLocalAggregates");
209  typename Kokkos::View<LO, device_type>::HostMirror h_numLocalAggregates =
210  Kokkos::create_mirror_view(numLocalAggregates);
211  h_numLocalAggregates() = aggregates.GetNumAggregates();
212  Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
213 
214  // Now we create new aggregates using root nodes in all colors other than the first color,
215  // as the first color was already exhausted in Phase 1.
216  for (int color = 2; color < numColors + 1; ++color) {
217  LO tmpNumNonAggregatedNodes = 0;
218  Kokkos::parallel_reduce(
219  "Aggregation Phase 2a: loop over each individual color",
220  Kokkos::RangePolicy<execution_space>(0, numRows),
221  KOKKOS_LAMBDA(const LO rootCandidate, LO& lNumNonAggregatedNodes) {
222  if (aggStat(rootCandidate) == READY &&
223  colors(rootCandidate) == color) {
224  LO numNeighbors = 0;
225  LO aggSize = 0;
226  if (matchMLbehavior) {
227  aggSize += 1;
228  numNeighbors += 1;
229  }
230 
231  auto neighbors = lclLWGraph.getNeighborVertices(rootCandidate);
232 
233  // Loop over neighbors to count how many nodes could join
234  // the new aggregate
235 
236  for (int j = 0; j < neighbors.length; ++j) {
237  LO neigh = neighbors(j);
238  if (neigh != rootCandidate) {
239  if (lclLWGraph.isLocalNeighborVertex(neigh) &&
240  (aggStat(neigh) == READY) &&
241  (aggSize < maxNodesPerAggregate)) {
242  ++aggSize;
243  }
244  ++numNeighbors;
245  }
246  }
247 
248  // If a sufficient number of nodes can join the new aggregate
249  // then we actually create the aggregate.
250  if (aggSize > minNodesPerAggregate &&
251  (aggSize > factor * numNeighbors)) {
252  // aggregates.SetIsRoot(rootCandidate);
253  LO aggIndex = Kokkos::
254  atomic_fetch_add(&numLocalAggregates(), 1);
255 
256  LO numAggregated = 0;
257 
258  if (matchMLbehavior) {
259  // Add the root.
260  aggStat(rootCandidate) = AGGREGATED;
261  vertex2AggId(rootCandidate, 0) = aggIndex;
262  procWinner(rootCandidate, 0) = myRank;
263  ++numAggregated;
264  --lNumNonAggregatedNodes;
265  }
266 
267  for (int neighIdx = 0; neighIdx < neighbors.length; ++neighIdx) {
268  LO neigh = neighbors(neighIdx);
269  if (neigh != rootCandidate) {
270  if (lclLWGraph.isLocalNeighborVertex(neigh) &&
271  (aggStat(neigh) == READY) &&
272  (numAggregated < aggSize)) {
273  aggStat(neigh) = AGGREGATED;
274  vertex2AggId(neigh, 0) = aggIndex;
275  procWinner(neigh, 0) = myRank;
276 
277  ++numAggregated;
278  --lNumNonAggregatedNodes;
279  }
280  }
281  }
282  }
283  }
284  },
285  tmpNumNonAggregatedNodes);
286  numNonAggregatedNodes += tmpNumNonAggregatedNodes;
287  }
288 
289  // update aggregate object
290  Kokkos::deep_copy(h_numLocalAggregates, numLocalAggregates);
291  aggregates.SetNumAggregates(h_numLocalAggregates());
292 } // BuildAggregatesRandom
293 
294 template <class LO, class GO, class Node>
297  const LWGraph_kokkos& graph,
298  Aggregates& aggregates,
300  LO& numNonAggregatedNodes) const {
301  using device_type = typename LWGraph_kokkos::device_type;
302  using execution_space = typename LWGraph_kokkos::execution_space;
303 
304  const int minNodesPerAggregate = params.get<int>("aggregation: min agg size");
305  const int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
306 
307  const LO numRows = graph.GetNodeNumVertices();
308  const int myRank = graph.GetComm()->getRank();
309 
310  auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
311  auto procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
312  auto colors = aggregates.GetGraphColors();
313  const LO numColors = aggregates.GetGraphNumColors();
314 
315  auto lclLWGraph = graph;
316 
317  LO numLocalNodes = procWinner.size();
318  LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
319 
320  const double aggFactor = 0.5;
321  double factor = as<double>(numLocalAggregated) / (numLocalNodes + 1);
322  factor = pow(factor, aggFactor);
323 
324  Kokkos::View<LO, device_type> numLocalAggregates("numLocalAggregates");
325  typename Kokkos::View<LO, device_type>::HostMirror h_numLocalAggregates =
326  Kokkos::create_mirror_view(numLocalAggregates);
327  h_numLocalAggregates() = aggregates.GetNumAggregates();
328  Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
329 
330  // Now we create new aggregates using root nodes in all colors other than the first color,
331  // as the first color was already exhausted in Phase 1.
332  //
333  // In the deterministic version, exactly the same set of aggregates will be created
334  // (as the nondeterministic version)
335  // because no vertex V can be a neighbor of two vertices of the same color, so two root
336  // candidates can't fight over V
337  //
338  // But, the precise values in vertex2AggId need to match exactly, so just sort the new
339  // roots of each color before assigning aggregate IDs
340 
341  // numNonAggregatedNodes is the best available upper bound for the number of aggregates
342  // which may be created in this phase, so use it for the size of newRoots
343  Kokkos::View<LO*, device_type> newRoots("New root LIDs", numNonAggregatedNodes);
344  Kokkos::View<LO, device_type> numNewRoots("Number of new aggregates of current color");
345  auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots);
346  for (int color = 1; color < numColors + 1; ++color) {
347  h_numNewRoots() = 0;
348  Kokkos::deep_copy(numNewRoots, h_numNewRoots);
349  Kokkos::parallel_for(
350  "Aggregation Phase 2a: determining new roots of current color",
351  Kokkos::RangePolicy<execution_space>(0, numRows),
352  KOKKOS_LAMBDA(const LO rootCandidate) {
353  if (aggStat(rootCandidate) == READY &&
354  colors(rootCandidate) == color) {
355  LO aggSize = 0;
356  auto neighbors = lclLWGraph.getNeighborVertices(rootCandidate);
357  // Loop over neighbors to count how many nodes could join
358  // the new aggregate
359  LO numNeighbors = 0;
360  for (int j = 0; j < neighbors.length; ++j) {
361  LO neigh = neighbors(j);
362  if (neigh != rootCandidate) {
363  if (lclLWGraph.isLocalNeighborVertex(neigh) &&
364  aggStat(neigh) == READY &&
365  aggSize < maxNodesPerAggregate) {
366  ++aggSize;
367  }
368  ++numNeighbors;
369  }
370  }
371  // If a sufficient number of nodes can join the new aggregate
372  // then we mark rootCandidate as a future root.
373  if (aggSize > minNodesPerAggregate && aggSize > factor * numNeighbors) {
374  LO newRootIndex = Kokkos::atomic_fetch_add(&numNewRoots(), 1);
375  newRoots(newRootIndex) = rootCandidate;
376  }
377  }
378  });
379  Kokkos::deep_copy(h_numNewRoots, numNewRoots);
380 
381  if (h_numNewRoots() > 0) {
382  // sort the new root indices
383  Kokkos::sort(newRoots, 0, h_numNewRoots());
384  // now, loop over all new roots again and actually create the aggregates
385  LO tmpNumNonAggregatedNodes = 0;
386  // First, just find the set of color vertices which will become aggregate roots
387  Kokkos::parallel_reduce(
388  "Aggregation Phase 2a: create new aggregates",
389  Kokkos::RangePolicy<execution_space>(0, h_numNewRoots()),
390  KOKKOS_LAMBDA(const LO newRootIndex, LO& lNumNonAggregatedNodes) {
391  LO root = newRoots(newRootIndex);
392  LO newAggID = numLocalAggregates() + newRootIndex;
393  auto neighbors = lclLWGraph.getNeighborVertices(root);
394  // Loop over neighbors and add them to new aggregate
395  aggStat(root) = AGGREGATED;
396  vertex2AggId(root, 0) = newAggID;
397  LO aggSize = 1;
398  for (int j = 0; j < neighbors.length; ++j) {
399  LO neigh = neighbors(j);
400  if (neigh != root) {
401  if (lclLWGraph.isLocalNeighborVertex(neigh) &&
402  aggStat(neigh) == READY &&
403  aggSize < maxNodesPerAggregate) {
404  aggStat(neigh) = AGGREGATED;
405  vertex2AggId(neigh, 0) = newAggID;
406  procWinner(neigh, 0) = myRank;
407  aggSize++;
408  }
409  }
410  }
411  lNumNonAggregatedNodes -= aggSize;
412  },
413  tmpNumNonAggregatedNodes);
414  numNonAggregatedNodes += tmpNumNonAggregatedNodes;
415  h_numLocalAggregates() += h_numNewRoots();
416  Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
417  }
418  }
419  aggregates.SetNumAggregates(h_numLocalAggregates());
420 }
421 
422 } // namespace MueLu
423 
424 #endif /* MUELU_AGGREGATIONPHASE2AALGORITHM_DEF_HPP_ */
Kokkos::View< unsigned *, typename LWGraphHostType::device_type > AggStatHostType
void BuildAggregatesRandom(const Teuchos::ParameterList &params, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
Lightweight MueLu representation of a compressed row storage graph.
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
Container class for aggregation information.
KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const
GlobalOrdinal GO
T & get(const std::string &name, T def_value)
LocalOrdinal LO
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
size_type size() const
void SetIsRoot(LO i, bool value=true)
Set root node information.
KOKKOS_INLINE_FUNCTION size_type getLocalMaxNumRowEntries() const
Returns the maximum number of entries across all rows/columns on this node.
LO GetGraphNumColors()
Get the number of colors needed by the distance 2 coloring.
colors_view_type & GetGraphColors()
Get a distance 2 coloring of the underlying graph. The coloring is computed and set during Phase1 of ...
KOKKOS_INLINE_FUNCTION bool isLocalNeighborVertex(LO i) const
Return true if vertex with local id &#39;v&#39; is on current process.
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
KOKKOS_INLINE_FUNCTION neighbor_vertices_type getNeighborVertices(LO i) const
Return the list of vertices adjacent to the vertex &#39;v&#39;.
void BuildAggregatesDeterministic(const Teuchos::ParameterList &params, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
Timer to be used in non-factories.
Lightweight MueLu representation of a compressed row storage graph.
void BuildAggregatesNonKokkos(const ParameterList &params, const LWGraph &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatHostType &aggStat, LO &numNonAggregatedNodes) const
Local aggregation.
void BuildAggregates(const Teuchos::ParameterList &params, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
typename std::conditional< OnHost, Kokkos::Device< Kokkos::Serial, Kokkos::HostSpace >, typename Node::device_type >::type device_type
const RCP< const Teuchos::Comm< int > > GetComm() const
Kokkos::View< unsigned *, typename LWGraphType::device_type > AggStatType
void SetNumAggregates(LO nAggregates)
Set number of local aggregates on current processor.