MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_AggregationPhase2aAlgorithm_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // MueLu: A package for multigrid based preconditioning
4 //
5 // Copyright 2012 NTESS and the MueLu contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef MUELU_AGGREGATIONPHASE2AALGORITHM_DEF_HPP_
11 #define MUELU_AGGREGATIONPHASE2AALGORITHM_DEF_HPP_
12 
13 #include <Teuchos_Comm.hpp>
14 #include <Teuchos_CommHelpers.hpp>
15 
16 #include <Xpetra_Vector.hpp>
17 
19 
20 #include "MueLu_LWGraph.hpp"
21 #include "MueLu_Aggregates.hpp"
22 #include "MueLu_Exceptions.hpp"
23 #include "MueLu_Monitor.hpp"
24 
25 #include "Kokkos_Sort.hpp"
26 
27 namespace MueLu {
28 
29 template <class LocalOrdinal, class GlobalOrdinal, class Node>
30 void AggregationPhase2aAlgorithm<LocalOrdinal, GlobalOrdinal, Node>::SetupPhase(const ParameterList& params, Teuchos::RCP<const Teuchos::Comm<int>>& comm, LO& numLocalNodes, LO& numNonAggregatedNodes) {
31  bool matchMLbehavior = params.get<bool>("aggregation: match ML phase2a");
32  if (matchMLbehavior) {
33  // Note: ML uses global counts to set the factor
34  // Passing # of nonaggregated nodes and # of nodes via aggStat
35  GO in_data[2] = {(GO)numNonAggregatedNodes, (GO)numLocalNodes};
36  GO out_data[2];
37  Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, 2, in_data, out_data);
38  GO phase_one_aggregated = out_data[1] - out_data[0];
39  factorMLOverride_ = as<double>(phase_one_aggregated) / (out_data[1] + 1);
40  }
41 }
42 
43 template <class LocalOrdinal, class GlobalOrdinal, class Node>
45  Monitor m(*this, "BuildAggregatesNonKokkos");
46 
47  int minNodesPerAggregate = params.get<int>("aggregation: min agg size");
48  int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
49  bool matchMLbehavior = params.get<bool>("aggregation: match ML phase2a");
50 
51  const LO numRows = graph.GetNodeNumVertices();
52  const int myRank = graph.GetComm()->getRank();
53 
54  ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0);
55  ArrayRCP<LO> procWinner = aggregates.GetProcWinner()->getDataNonConst(0);
56 
57  LO numLocalAggregates = aggregates.GetNumAggregates();
58 
59  LO numLocalNodes = procWinner.size();
60  LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
61 
62  const double aggFactor = params.get<double>("aggregation: phase2a agg factor");
63  double factor;
64 
65  if (matchMLbehavior) {
66  // Computed in SetupPhase.
67  // This needs to be precomputed in case some ranks are done after phase1.
68  factor = factorMLOverride_;
69 
70  LO agg_stat_unaggregated = 0;
71  LO agg_stat_aggregated = 0;
72  LO agg_stat_bdry = 0;
73  for (LO i = 0; i < (LO)aggStat.size(); i++) {
74  if (aggStat[i] == AGGREGATED)
75  agg_stat_aggregated++;
76  else if (aggStat[i] == BOUNDARY)
77  agg_stat_bdry++;
78  else
79  agg_stat_unaggregated++;
80  }
81 
82  // NOTE: ML always uses 3 as minNodesPerAggregate
83  minNodesPerAggregate = 3;
84 
85  } else {
86  // MueLu defaults to using local counts to set the factor
87  factor = as<double>(numLocalAggregated) / (numLocalNodes + 1);
88  }
89  TEUCHOS_TEST_FOR_EXCEPTION(!std::isfinite(Teuchos::ScalarTraits<double>::magnitude(factor)), Exceptions::RuntimeError, "Phase2a factor needs to be finite.");
90 
91  // Now apply aggFactor
92  factor = pow(factor, aggFactor);
93 
94  int aggIndex = -1;
95  size_t aggSize = 0;
96  std::vector<int> aggList(graph.getLocalMaxNumRowEntries());
97 
98  for (LO rootCandidate = 0; rootCandidate < numRows; rootCandidate++) {
99  if (aggStat[rootCandidate] != READY) {
100  continue;
101  }
102 
103  LO numNeighbors = 0;
104  aggSize = 0;
105  if (matchMLbehavior) {
106  aggList[aggSize++] = rootCandidate;
107  numNeighbors++;
108  }
109 
110  auto neighOfINode = graph.getNeighborVertices(rootCandidate);
111 
112  LO num_nonaggd_neighbors = 0, num_local_neighbors = 0;
113  for (int j = 0; j < neighOfINode.length; j++) {
114  LO neigh = neighOfINode(j);
115  if (graph.isLocalNeighborVertex(neigh))
116  num_local_neighbors++;
117 
118  if (neigh != rootCandidate) {
119  if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == READY) {
120  // If aggregate size does not exceed max size, add node to the tentative aggregate
121  // NOTE: We do not exit the loop over all neighbours since we have still
122  // to count all aggregated neighbour nodes for the aggregation criteria
123  // NOTE: We check here for the maximum aggregation size. If we would do it below
124  // with all the other check too big aggregates would not be accepted at all.
125  if (aggSize < as<size_t>(maxNodesPerAggregate))
126  aggList[aggSize++] = neigh;
127  num_nonaggd_neighbors++;
128  }
129 
130  numNeighbors++;
131  }
132  }
133 
134  bool accept_aggregate;
135  if (matchMLbehavior) {
136  // ML does this calculation slightly differently than MueLu does by default, specifically it
137  // uses the *local* number of neigbors, regardless of what they are.
138  // NOTE: ML does zero compression here. Not sure if it matters
139  // NOTE: ML uses a hardcoded value 3 instead of minNodesPerAggregate. This has been set above
140  LO rowi_N = num_local_neighbors;
141  num_nonaggd_neighbors++; // ML counts the node itself as a nonaggd_neighbor
142  accept_aggregate = (rowi_N > as<LO>(minNodesPerAggregate)) && (num_nonaggd_neighbors > (factor * rowi_N));
143  } else {
144  accept_aggregate = (aggSize > as<size_t>(minNodesPerAggregate)) && (aggSize > factor * numNeighbors);
145  }
146 
147  if (accept_aggregate) {
148  // Accept new aggregate
149  // rootCandidate becomes the root of the newly formed aggregate
150  aggregates.SetIsRoot(rootCandidate);
151  aggIndex = numLocalAggregates++;
152 
153  for (size_t k = 0; k < aggSize; k++) {
154  aggStat[aggList[k]] = AGGREGATED;
155  vertex2AggId[aggList[k]] = aggIndex;
156  procWinner[aggList[k]] = myRank;
157  }
158 
159  numNonAggregatedNodes -= aggSize;
160  }
161  }
162 
163  // update aggregate object
164  aggregates.SetNumAggregates(numLocalAggregates);
165 }
166 
167 template <class LocalOrdinal, class GlobalOrdinal, class Node>
170  const LWGraph_kokkos& graph,
171  Aggregates& aggregates,
173  LO& numNonAggregatedNodes) const {
174  if (params.get<bool>("aggregation: deterministic")) {
175  Monitor m(*this, "BuildAggregatesDeterministic");
176  BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes);
177  } else {
178  Monitor m(*this, "BuildAggregatesRandom");
179  BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes);
180  }
181 
182 } // BuildAggregates
183 
184 template <class LO, class GO, class Node>
187  const LWGraph_kokkos& graph,
188  Aggregates& aggregates,
190  LO& numNonAggregatedNodes) const {
191  using device_type = typename LWGraph_kokkos::device_type;
192  using execution_space = typename LWGraph_kokkos::execution_space;
193 
194  const int minNodesPerAggregate = params.get<int>("aggregation: min agg size");
195  const int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
196  bool matchMLbehavior = params.get<bool>("aggregation: match ML phase2a");
197 
198  const LO numRows = graph.GetNodeNumVertices();
199  const int myRank = graph.GetComm()->getRank();
200 
201  auto vertex2AggId = aggregates.GetVertex2AggId()->getLocalViewDevice(Xpetra::Access::ReadWrite);
202  auto procWinner = aggregates.GetProcWinner()->getLocalViewDevice(Xpetra::Access::ReadWrite);
203  auto colors = aggregates.GetGraphColors();
204  const LO numColors = aggregates.GetGraphNumColors();
205 
206  auto lclLWGraph = graph;
207 
208  LO numLocalNodes = numRows;
209  LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
210 
211  const double aggFactor = 0.5;
212  double factor = static_cast<double>(numLocalAggregated) / (numLocalNodes + 1);
213  factor = pow(factor, aggFactor);
214 
215  // LBV on Sept 12, 2019: this looks a little heavy handed,
216  // I'm not sure a view is needed to perform atomic updates.
217  // If we can avoid this and use a simple LO that would be
218  // simpler for later maintenance.
219  Kokkos::View<LO, device_type> numLocalAggregates("numLocalAggregates");
220  typename Kokkos::View<LO, device_type>::host_mirror_type h_numLocalAggregates =
221  Kokkos::create_mirror_view(numLocalAggregates);
222  h_numLocalAggregates() = aggregates.GetNumAggregates();
223  Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
224 
225  // Now we create new aggregates using root nodes in all colors other than the first color,
226  // as the first color was already exhausted in Phase 1.
227  for (int color = 2; color < numColors + 1; ++color) {
228  LO tmpNumNonAggregatedNodes = 0;
229  Kokkos::parallel_reduce(
230  "Aggregation Phase 2a: loop over each individual color",
231  Kokkos::RangePolicy<execution_space>(0, numRows),
232  KOKKOS_LAMBDA(const LO rootCandidate, LO& lNumNonAggregatedNodes) {
233  if (aggStat(rootCandidate) == READY &&
234  colors(rootCandidate) == color) {
235  LO numNeighbors = 0;
236  LO aggSize = 0;
237  if (matchMLbehavior) {
238  aggSize += 1;
239  numNeighbors += 1;
240  }
241 
242  auto neighbors = lclLWGraph.getNeighborVertices(rootCandidate);
243 
244  // Loop over neighbors to count how many nodes could join
245  // the new aggregate
246 
247  for (int j = 0; j < neighbors.length; ++j) {
248  LO neigh = neighbors(j);
249  if (neigh != rootCandidate) {
250  if (lclLWGraph.isLocalNeighborVertex(neigh) &&
251  (aggStat(neigh) == READY) &&
252  (aggSize < maxNodesPerAggregate)) {
253  ++aggSize;
254  }
255  ++numNeighbors;
256  }
257  }
258 
259  // If a sufficient number of nodes can join the new aggregate
260  // then we actually create the aggregate.
261  if (aggSize > minNodesPerAggregate &&
262  (aggSize > factor * numNeighbors)) {
263  // aggregates.SetIsRoot(rootCandidate);
264  LO aggIndex = Kokkos::
265  atomic_fetch_add(&numLocalAggregates(), 1);
266 
267  LO numAggregated = 0;
268 
269  if (matchMLbehavior) {
270  // Add the root.
271  aggStat(rootCandidate) = AGGREGATED;
272  vertex2AggId(rootCandidate, 0) = aggIndex;
273  procWinner(rootCandidate, 0) = myRank;
274  ++numAggregated;
275  --lNumNonAggregatedNodes;
276  }
277 
278  for (int neighIdx = 0; neighIdx < neighbors.length; ++neighIdx) {
279  LO neigh = neighbors(neighIdx);
280  if (neigh != rootCandidate) {
281  if (lclLWGraph.isLocalNeighborVertex(neigh) &&
282  (aggStat(neigh) == READY) &&
283  (numAggregated < aggSize)) {
284  aggStat(neigh) = AGGREGATED;
285  vertex2AggId(neigh, 0) = aggIndex;
286  procWinner(neigh, 0) = myRank;
287 
288  ++numAggregated;
289  --lNumNonAggregatedNodes;
290  }
291  }
292  }
293  }
294  }
295  },
296  tmpNumNonAggregatedNodes);
297  numNonAggregatedNodes += tmpNumNonAggregatedNodes;
298  }
299 
300  // update aggregate object
301  Kokkos::deep_copy(h_numLocalAggregates, numLocalAggregates);
302  aggregates.SetNumAggregates(h_numLocalAggregates());
303 } // BuildAggregatesRandom
304 
305 template <class LO, class GO, class Node>
308  const LWGraph_kokkos& graph,
309  Aggregates& aggregates,
311  LO& numNonAggregatedNodes) const {
312  using device_type = typename LWGraph_kokkos::device_type;
313  using execution_space = typename LWGraph_kokkos::execution_space;
314 
315  const int minNodesPerAggregate = params.get<int>("aggregation: min agg size");
316  const int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
317 
318  const LO numRows = graph.GetNodeNumVertices();
319  const int myRank = graph.GetComm()->getRank();
320 
321  auto vertex2AggId = aggregates.GetVertex2AggId()->getLocalViewDevice(Xpetra::Access::ReadWrite);
322  auto procWinner = aggregates.GetProcWinner()->getLocalViewDevice(Xpetra::Access::ReadWrite);
323  auto colors = aggregates.GetGraphColors();
324  const LO numColors = aggregates.GetGraphNumColors();
325 
326  auto lclLWGraph = graph;
327 
328  LO numLocalNodes = procWinner.size();
329  LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
330 
331  const double aggFactor = 0.5;
332  double factor = as<double>(numLocalAggregated) / (numLocalNodes + 1);
333  factor = pow(factor, aggFactor);
334 
335  Kokkos::View<LO, device_type> numLocalAggregates("numLocalAggregates");
336  typename Kokkos::View<LO, device_type>::host_mirror_type h_numLocalAggregates =
337  Kokkos::create_mirror_view(numLocalAggregates);
338  h_numLocalAggregates() = aggregates.GetNumAggregates();
339  Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
340 
341  // Now we create new aggregates using root nodes in all colors other than the first color,
342  // as the first color was already exhausted in Phase 1.
343  //
344  // In the deterministic version, exactly the same set of aggregates will be created
345  // (as the nondeterministic version)
346  // because no vertex V can be a neighbor of two vertices of the same color, so two root
347  // candidates can't fight over V
348  //
349  // But, the precise values in vertex2AggId need to match exactly, so just sort the new
350  // roots of each color before assigning aggregate IDs
351 
352  // numNonAggregatedNodes is the best available upper bound for the number of aggregates
353  // which may be created in this phase, so use it for the size of newRoots
354  Kokkos::View<LO*, device_type> newRoots("New root LIDs", numNonAggregatedNodes);
355  Kokkos::View<LO, device_type> numNewRoots("Number of new aggregates of current color");
356  auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots);
357  for (int color = 1; color < numColors + 1; ++color) {
358  h_numNewRoots() = 0;
359  Kokkos::deep_copy(numNewRoots, h_numNewRoots);
360  Kokkos::parallel_for(
361  "Aggregation Phase 2a: determining new roots of current color",
362  Kokkos::RangePolicy<execution_space>(0, numRows),
363  KOKKOS_LAMBDA(const LO rootCandidate) {
364  if (aggStat(rootCandidate) == READY &&
365  colors(rootCandidate) == color) {
366  LO aggSize = 0;
367  auto neighbors = lclLWGraph.getNeighborVertices(rootCandidate);
368  // Loop over neighbors to count how many nodes could join
369  // the new aggregate
370  LO numNeighbors = 0;
371  for (int j = 0; j < neighbors.length; ++j) {
372  LO neigh = neighbors(j);
373  if (neigh != rootCandidate) {
374  if (lclLWGraph.isLocalNeighborVertex(neigh) &&
375  aggStat(neigh) == READY &&
376  aggSize < maxNodesPerAggregate) {
377  ++aggSize;
378  }
379  ++numNeighbors;
380  }
381  }
382  // If a sufficient number of nodes can join the new aggregate
383  // then we mark rootCandidate as a future root.
384  if (aggSize > minNodesPerAggregate && aggSize > factor * numNeighbors) {
385  LO newRootIndex = Kokkos::atomic_fetch_add(&numNewRoots(), 1);
386  newRoots(newRootIndex) = rootCandidate;
387  }
388  }
389  });
390  Kokkos::deep_copy(h_numNewRoots, numNewRoots);
391 
392  if (h_numNewRoots() > 0) {
393  // sort the new root indices
394  Kokkos::sort(newRoots, 0, h_numNewRoots());
395  // now, loop over all new roots again and actually create the aggregates
396  LO tmpNumNonAggregatedNodes = 0;
397  // First, just find the set of color vertices which will become aggregate roots
398  Kokkos::parallel_reduce(
399  "Aggregation Phase 2a: create new aggregates",
400  Kokkos::RangePolicy<execution_space>(0, h_numNewRoots()),
401  KOKKOS_LAMBDA(const LO newRootIndex, LO& lNumNonAggregatedNodes) {
402  LO root = newRoots(newRootIndex);
403  LO newAggID = numLocalAggregates() + newRootIndex;
404  auto neighbors = lclLWGraph.getNeighborVertices(root);
405  // Loop over neighbors and add them to new aggregate
406  aggStat(root) = AGGREGATED;
407  vertex2AggId(root, 0) = newAggID;
408  LO aggSize = 1;
409  for (int j = 0; j < neighbors.length; ++j) {
410  LO neigh = neighbors(j);
411  if (neigh != root) {
412  if (lclLWGraph.isLocalNeighborVertex(neigh) &&
413  aggStat(neigh) == READY &&
414  aggSize < maxNodesPerAggregate) {
415  aggStat(neigh) = AGGREGATED;
416  vertex2AggId(neigh, 0) = newAggID;
417  procWinner(neigh, 0) = myRank;
418  aggSize++;
419  }
420  }
421  }
422  lNumNonAggregatedNodes -= aggSize;
423  },
424  tmpNumNonAggregatedNodes);
425  numNonAggregatedNodes += tmpNumNonAggregatedNodes;
426  h_numLocalAggregates() += h_numNewRoots();
427  Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
428  }
429  }
430  aggregates.SetNumAggregates(h_numLocalAggregates());
431 }
432 
433 } // namespace MueLu
434 
435 #endif /* MUELU_AGGREGATIONPHASE2AALGORITHM_DEF_HPP_ */
Kokkos::View< unsigned *, typename LWGraphHostType::device_type > AggStatHostType
void BuildAggregatesRandom(const Teuchos::ParameterList &params, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
Lightweight MueLu representation of a compressed row storage graph.
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
Container class for aggregation information.
KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const
GlobalOrdinal GO
T & get(const std::string &name, T def_value)
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
LocalOrdinal LO
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
size_type size() const
void SetIsRoot(LO i, bool value=true)
Set root node information.
KOKKOS_INLINE_FUNCTION size_type getLocalMaxNumRowEntries() const
Returns the maximum number of entries across all rows/columns on this node.
LO GetGraphNumColors()
Get the number of colors needed by the distance 2 coloring.
void SetupPhase(const ParameterList &params, Teuchos::RCP< const Teuchos::Comm< int >> &comm, LO &numLocalNodes, LO &numNonAggregatedNodes) override
Local aggregation.
colors_view_type & GetGraphColors()
Get a distance 2 coloring of the underlying graph. The coloring is computed and set during Phase1 of ...
KOKKOS_INLINE_FUNCTION bool isLocalNeighborVertex(LO i) const
Return true if vertex with local id &#39;v&#39; is on current process.
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
KOKKOS_INLINE_FUNCTION neighbor_vertices_type getNeighborVertices(LO i) const
Return the list of vertices adjacent to the vertex &#39;v&#39;.
void BuildAggregatesDeterministic(const Teuchos::ParameterList &params, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
Timer to be used in non-factories.
Lightweight MueLu representation of a compressed row storage graph.
void BuildAggregatesNonKokkos(const ParameterList &params, const LWGraph &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatHostType &aggStat, LO &numNonAggregatedNodes) const
void BuildAggregates(const Teuchos::ParameterList &params, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
Exception throws to report errors in the internal logical of the program.
typename std::conditional< OnHost, Kokkos::Device< Kokkos::Serial, Kokkos::HostSpace >, typename Node::device_type >::type device_type
const RCP< const Teuchos::Comm< int > > GetComm() const
Kokkos::View< unsigned *, typename LWGraphType::device_type > AggStatType
void SetNumAggregates(LO nAggregates)
Set number of local aggregates on current processor.