MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_AggregationPhase2bAlgorithm_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // MueLu: A package for multigrid based preconditioning
4 //
5 // Copyright 2012 NTESS and the MueLu contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef MUELU_AGGREGATIONPHASE2BALGORITHM_DEF_HPP_
11 #define MUELU_AGGREGATIONPHASE2BALGORITHM_DEF_HPP_
12 
13 #include <Teuchos_Comm.hpp>
14 #include <Teuchos_CommHelpers.hpp>
15 
16 #include <Xpetra_Vector.hpp>
17 
19 
20 #include "MueLu_Aggregates.hpp"
21 #include "MueLu_Exceptions.hpp"
22 #include "MueLu_LWGraph.hpp"
23 #include "MueLu_Monitor.hpp"
24 
25 namespace MueLu {
26 
27 // Try to stick unaggregated nodes into a neighboring aggregate if they are
28 // not already too big
29 template <class LocalOrdinal, class GlobalOrdinal, class Node>
31  Monitor m(*this, "BuildAggregatesNonKokkos");
32  bool matchMLbehavior = params.get<bool>("aggregation: match ML phase2b");
33 
34  const LO numRows = graph.GetNodeNumVertices();
35  const int myRank = graph.GetComm()->getRank();
36 
37  ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0);
38  ArrayRCP<LO> procWinner = aggregates.GetProcWinner()->getDataNonConst(0);
39 
40  LO numLocalAggregates = aggregates.GetNumAggregates();
41 
42  const LO defaultConnectWeight = 100;
43  const LO penaltyConnectWeight = 10;
44 
45  std::vector<LO> aggWeight(numLocalAggregates, 0);
46  std::vector<LO> connectWeight(numRows, defaultConnectWeight);
47  std::vector<LO> aggPenalties(numRows, 0);
48 
49  // We do this cycle twice.
50  // I don't know why, but ML does it too
51  // taw: by running the aggregation routine more than once there is a chance that also
52  // non-aggregated nodes with a node distance of two are added to existing aggregates.
53  // Assuming that the aggregate size is 3 in each direction running the algorithm only twice
54  // should be sufficient.
55  for (int k = 0; k < 2; k++) {
56  for (LO i = 0; i < numRows; i++) {
57  if (aggStat[i] != READY)
58  continue;
59 
60  auto neighOfINode = graph.getNeighborVertices(i);
61 
62  for (int j = 0; j < neighOfINode.length; j++) {
63  LO neigh = neighOfINode(j);
64 
65  // We don't check (neigh != i), as it is covered by checking (aggStat[neigh] == AGGREGATED)
66  if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == AGGREGATED)
67  aggWeight[vertex2AggId[neigh]] += connectWeight[neigh];
68  }
69 
70  int bestScore = -100000;
71  int bestAggId = -1;
72  int bestConnect = -1;
73 
74  for (int j = 0; j < neighOfINode.length; j++) {
75  LO neigh = neighOfINode(j);
76  int aggId = vertex2AggId[neigh];
77 
78  // Note: The third condition is only relevant if the ML matching is enabled
79  if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == AGGREGATED && (!matchMLbehavior || aggWeight[aggId] != 0)) {
80  int score = aggWeight[aggId] - aggPenalties[aggId];
81 
82  if (score > bestScore) {
83  bestAggId = aggId;
84  bestScore = score;
85  bestConnect = connectWeight[neigh];
86 
87  } else if (aggId == bestAggId && connectWeight[neigh] > bestConnect) {
88  bestConnect = connectWeight[neigh];
89  }
90 
91  // Reset the weights for the next loop
92  aggWeight[aggId] = 0;
93  }
94  }
95 
96  if (bestScore >= 0) {
97  aggStat[i] = AGGREGATED;
98  vertex2AggId[i] = bestAggId;
99  procWinner[i] = myRank;
100 
101  numNonAggregatedNodes--;
102 
103  aggPenalties[bestAggId]++;
104  connectWeight[i] = bestConnect - penaltyConnectWeight;
105  }
106  }
107  }
108 }
109 
110 // Try to stick unaggregated nodes into a neighboring aggregate if they are
111 // not already too big
112 template <class LocalOrdinal, class GlobalOrdinal, class Node>
115  const LWGraph_kokkos& graph,
116  Aggregates& aggregates,
118  LO& numNonAggregatedNodes) const {
119  if (params.get<bool>("aggregation: deterministic")) {
120  Monitor m(*this, "BuildAggregatesDeterministic");
121  BuildAggregates<true>(params, graph, aggregates, aggStat, numNonAggregatedNodes);
122  } else {
123  Monitor m(*this, "BuildAggregatesRandom");
124  BuildAggregates<false>(params, graph, aggregates, aggStat, numNonAggregatedNodes);
125  }
126 
127 } // BuildAggregates
128 
129 template <class AggStatType, class ColorsType, class LO>
131  private:
132  AggStatType aggStat;
133  ColorsType colors;
134 
135  public:
136  using value_type = LO[];
138 
139  CountUnggregatedByColor(AggStatType& aggStat_, ColorsType& colors_, LO numColors_)
140  : aggStat(aggStat_)
141  , colors(colors_)
142  , value_count(numColors_) {}
143 
144  KOKKOS_INLINE_FUNCTION
145  void operator()(const LO i, LO counts[]) const {
146  if (aggStat(i) == READY)
147  counts[colors(i) - 1] += 1;
148  }
149 };
150 
151 template <class AggStatType, class ProcWinnerType, class Vertex2AggType, class ColorsType, class LocalGraphType, class AggPenaltyType, class LO, bool deterministic, bool matchMLbehavior>
153  private:
154  AggStatType aggStat;
155  ProcWinnerType procWinner;
156  Vertex2AggType vertex2AggId;
157  ColorsType colors;
158  LocalGraphType lclLWGraph;
159  AggPenaltyType aggPenalties;
160  AggPenaltyType aggPenaltyUpdates;
161  AggPenaltyType connectWeight;
165 
166  public:
167  ExpansionFunctor(AggStatType& aggStat_, ProcWinnerType& procWinner_, Vertex2AggType& vertex2AggId_, ColorsType& colors_, LocalGraphType& lclLWGraph_, AggPenaltyType& aggPenalties_, AggPenaltyType& aggPenaltyUpdates_, AggPenaltyType& connectWeight_, LO penaltyConnectWeight_, LO color_, LO rank_)
168  : aggStat(aggStat_)
169  , procWinner(procWinner_)
170  , vertex2AggId(vertex2AggId_)
171  , colors(colors_)
172  , lclLWGraph(lclLWGraph_)
173  , aggPenalties(aggPenalties_)
174  , aggPenaltyUpdates(aggPenaltyUpdates_)
175  , connectWeight(connectWeight_)
176  , penaltyConnectWeight(penaltyConnectWeight_)
177  , color(color_)
178  , myRank(rank_) {}
179 
180  ExpansionFunctor(AggStatType& aggStat_, ProcWinnerType& procWinner_, Vertex2AggType& vertex2AggId_, ColorsType& colors_, LocalGraphType& lclLWGraph_, AggPenaltyType& aggPenalties_, AggPenaltyType& connectWeight_, LO penaltyConnectWeight_, LO color_, LO rank_)
181  : aggStat(aggStat_)
182  , procWinner(procWinner_)
183  , vertex2AggId(vertex2AggId_)
184  , colors(colors_)
185  , lclLWGraph(lclLWGraph_)
186  , aggPenalties(aggPenalties_)
187  , connectWeight(connectWeight_)
188  , penaltyConnectWeight(penaltyConnectWeight_)
189  , color(color_)
190  , myRank(rank_) {}
191 
192  KOKKOS_INLINE_FUNCTION
193  void operator()(const LO& i, LO& tmpNumAggregated) const {
194  if (aggStat(i) != READY || colors(i) != color)
195  return;
196 
197  int bestScore = -100000;
198  int bestAggId = -1;
199  int bestConnect = -1;
200 
201  auto neighOfINode = lclLWGraph.getNeighborVertices(i);
202 
203  for (int j = 0; j < neighOfINode.length; j++) {
204  LO neigh = neighOfINode(j);
205 
206  if (lclLWGraph.isLocalNeighborVertex(neigh) &&
207  (aggStat(neigh) == AGGREGATED)) {
208  auto aggId = vertex2AggId(neigh, 0);
209  LO aggWeight = 0;
210  for (int k = 0; k < neighOfINode.length; k++) {
211  LO neigh2 = neighOfINode(k);
212  if (lclLWGraph.isLocalNeighborVertex(neigh2) &&
213  (aggStat(neigh2) == AGGREGATED) &&
214  (vertex2AggId(neigh2, 0) == aggId))
215  aggWeight += connectWeight(neigh2);
216  }
217 
218  if (matchMLbehavior && (aggWeight == 0))
219  return;
220 
221  int score = aggWeight - aggPenalties(aggId);
222 
223  if (score > bestScore) {
224  bestAggId = aggId;
225  bestScore = score;
226  bestConnect = connectWeight(neigh);
227 
228  } else if (aggId == bestAggId &&
229  connectWeight(neigh) > bestConnect) {
230  bestConnect = connectWeight(neigh);
231  }
232  }
233  }
234  if (bestScore >= 0) {
235  aggStat(i) = AGGREGATED;
236  vertex2AggId(i, 0) = bestAggId;
237  procWinner(i, 0) = myRank;
238 
239  if constexpr (deterministic) {
240  Kokkos::atomic_add(&aggPenaltyUpdates(bestAggId), 1);
241  } else {
242  Kokkos::atomic_add(&aggPenalties(bestAggId), 1);
243  }
244  connectWeight(i) = bestConnect - penaltyConnectWeight;
245  tmpNumAggregated++;
246  }
247  }
248 };
249 
250 template <class LocalOrdinal, class GlobalOrdinal, class Node>
251 template <bool deterministic>
254  const LWGraph_kokkos graph,
255  Aggregates& aggregates,
257  LO& numNonAggregatedNodes) const {
258  using device_type = typename LWGraph_kokkos::device_type;
259  using execution_space = typename LWGraph_kokkos::execution_space;
260 
261  bool matchMLbehavior = params.get<bool>("aggregation: match ML phase2b");
262 
263  const LO numRows = graph.GetNodeNumVertices();
264  const int myRank = graph.GetComm()->getRank();
265 
266  auto vertex2AggId = aggregates.GetVertex2AggId()->getLocalViewDevice(Xpetra::Access::ReadWrite);
267  auto procWinner = aggregates.GetProcWinner()->getLocalViewDevice(Xpetra::Access::ReadWrite);
268  auto colors = aggregates.GetGraphColors();
269  const LO numColors = aggregates.GetGraphNumColors();
270  const LO numLocalAggregates = aggregates.GetNumAggregates();
271 
272  const LO defaultConnectWeight = 100;
273  const LO penaltyConnectWeight = 10;
274 
275  Kokkos::View<LO*, device_type> connectWeight(Kokkos::ViewAllocateWithoutInitializing("connectWeight"), numRows);
276  Kokkos::View<LO*, device_type> aggPenalties("aggPenalties", numLocalAggregates); // This gets initialized to zero here
277  Kokkos::View<LO*, device_type> aggPenaltyUpdates;
278  // if constexpr (deterministic)
279  aggPenaltyUpdates = Kokkos::View<LO*, device_type>("aggPenaltyUpdates", numLocalAggregates);
280 
281  Kokkos::deep_copy(connectWeight, defaultConnectWeight);
282 
283  // taw: by running the aggregation routine more than once there is a chance that also
284  // non-aggregated nodes with a node distance of two are added to existing aggregates.
285  // Assuming that the aggregate size is 3 in each direction running the algorithm only twice
286  // should be sufficient.
287  // lbv: If the prior phase of aggregation where run without specifying an aggregate size,
288  // the distance 2 coloring and phase 1 aggregation actually guarantee that only one iteration
289  // is needed to reach distance 2 neighbors.
290  int maxIters = 2;
291  int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
292  if (maxNodesPerAggregate == std::numeric_limits<int>::max()) {
293  maxIters = 1;
294  }
295 
296  Kokkos::View<LO*, Kokkos::HostSpace> numUnaggregatedNodesPerColor("numUnaggregatedNodesPerColor", numColors);
297  for (int iter = 0; iter < maxIters; ++iter) {
298  {
299  auto functor = CountUnggregatedByColor<decltype(aggStat), decltype(colors), LO>(aggStat, colors, numColors);
300  Kokkos::parallel_reduce(
301  "Aggregation Phase 2b: count unaggregated nodes per color",
302  Kokkos::RangePolicy<execution_space>(0, numRows),
303  functor,
304  numUnaggregatedNodesPerColor);
305  }
306  for (LO color = 1; color <= numColors; ++color) {
307  if (numUnaggregatedNodesPerColor(color - 1) == 0)
308  continue;
309 
310  // the reduce counts how many nodes are aggregated by this phase,
311  // which will then be subtracted from numNonAggregatedNodes
312  LO numAggregated = 0;
313 
314  if constexpr (deterministic) {
315  if (matchMLbehavior) {
316  auto functor = ExpansionFunctor<decltype(aggStat), decltype(procWinner), decltype(vertex2AggId), decltype(colors), decltype(graph), decltype(aggPenalties), LO, true, true>(aggStat, procWinner, vertex2AggId, colors, graph, aggPenalties, aggPenaltyUpdates, connectWeight, penaltyConnectWeight, color, myRank);
317 
318  Kokkos::parallel_reduce("Aggregation Phase 2b: aggregates expansion",
319  Kokkos::RangePolicy<execution_space>(0, numRows),
320  functor,
321  numAggregated);
322  } else {
323  auto functor = ExpansionFunctor<decltype(aggStat), decltype(procWinner), decltype(vertex2AggId), decltype(colors), decltype(graph), decltype(aggPenalties), LO, true, false>(aggStat, procWinner, vertex2AggId, colors, graph, aggPenalties, aggPenaltyUpdates, connectWeight, penaltyConnectWeight, color, myRank);
324 
325  Kokkos::parallel_reduce("Aggregation Phase 2b: aggregates expansion",
326  Kokkos::RangePolicy<execution_space>(0, numRows),
327  functor,
328  numAggregated);
329  }
330  } else {
331  if (matchMLbehavior) {
332  auto functor = ExpansionFunctor<decltype(aggStat), decltype(procWinner), decltype(vertex2AggId), decltype(colors), decltype(graph), decltype(aggPenalties), LO, false, true>(aggStat, procWinner, vertex2AggId, colors, graph, aggPenalties, connectWeight, penaltyConnectWeight, color, myRank);
333 
334  Kokkos::parallel_reduce("Aggregation Phase 2b: aggregates expansion",
335  Kokkos::RangePolicy<execution_space>(0, numRows),
336  functor,
337  numAggregated);
338  } else {
339  auto functor = ExpansionFunctor<decltype(aggStat), decltype(procWinner), decltype(vertex2AggId), decltype(colors), decltype(graph), decltype(aggPenalties), LO, false, false>(aggStat, procWinner, vertex2AggId, colors, graph, aggPenalties, connectWeight, penaltyConnectWeight, color, myRank);
340 
341  Kokkos::parallel_reduce("Aggregation Phase 2b: aggregates expansion",
342  Kokkos::RangePolicy<execution_space>(0, numRows),
343  functor,
344  numAggregated);
345  }
346  }
347 
348  numNonAggregatedNodes -= numAggregated;
349 
350  if (numNonAggregatedNodes == 0)
351  break;
352 
353  if constexpr (deterministic) {
354  Kokkos::parallel_for(
355  "Aggregation Phase 2b: updating agg penalties",
356  Kokkos::RangePolicy<execution_space>(0, numLocalAggregates),
357  KOKKOS_LAMBDA(const LO agg) {
358  aggPenalties(agg) += aggPenaltyUpdates(agg);
359  aggPenaltyUpdates(agg) = 0;
360  });
361  }
362  }
363  } // loop over maxIters
364 
365 } // BuildAggregates
366 
367 } // namespace MueLu
368 
369 #endif /* MUELU_AGGREGATIONPHASE2BALGORITHM_DEF_HPP_ */
Kokkos::View< unsigned *, typename LWGraphHostType::device_type > AggStatHostType
Lightweight MueLu representation of a compressed row storage graph.
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
Container class for aggregation information.
KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const
T & get(const std::string &name, T def_value)
ExpansionFunctor(AggStatType &aggStat_, ProcWinnerType &procWinner_, Vertex2AggType &vertex2AggId_, ColorsType &colors_, LocalGraphType &lclLWGraph_, AggPenaltyType &aggPenalties_, AggPenaltyType &aggPenaltyUpdates_, AggPenaltyType &connectWeight_, LO penaltyConnectWeight_, LO color_, LO rank_)
LocalOrdinal LO
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
void BuildAggregatesNonKokkos(const ParameterList &params, const LWGraph &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatHostType &aggStat, LO &numNonAggregatedNodes) const
Local aggregation.
CountUnggregatedByColor(AggStatType &aggStat_, ColorsType &colors_, LO numColors_)
LO GetGraphNumColors()
Get the number of colors needed by the distance 2 coloring.
ExpansionFunctor(AggStatType &aggStat_, ProcWinnerType &procWinner_, Vertex2AggType &vertex2AggId_, ColorsType &colors_, LocalGraphType &lclLWGraph_, AggPenaltyType &aggPenalties_, AggPenaltyType &connectWeight_, LO penaltyConnectWeight_, LO color_, LO rank_)
KOKKOS_INLINE_FUNCTION void operator()(const LO &i, LO &tmpNumAggregated) const
colors_view_type & GetGraphColors()
Get a distance 2 coloring of the underlying graph. The coloring is computed and set during Phase1 of ...
KOKKOS_INLINE_FUNCTION bool isLocalNeighborVertex(LO i) const
Return true if vertex with local id &#39;v&#39; is on current process.
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
void BuildAggregates(const ParameterList &params, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
KOKKOS_INLINE_FUNCTION neighbor_vertices_type getNeighborVertices(LO i) const
Return the list of vertices adjacent to the vertex &#39;v&#39;.
KOKKOS_INLINE_FUNCTION void operator()(const LO i, LO counts[]) const
TransListIter iter
Timer to be used in non-factories.
Lightweight MueLu representation of a compressed row storage graph.
typename std::conditional< OnHost, Kokkos::Device< Kokkos::Serial, Kokkos::HostSpace >, typename Node::device_type >::type device_type
const RCP< const Teuchos::Comm< int > > GetComm() const
Kokkos::View< unsigned *, typename LWGraphType::device_type > AggStatType