MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_AggregationPhase2bAlgorithm_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // MueLu: A package for multigrid based preconditioning
4 //
5 // Copyright 2012 NTESS and the MueLu contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef MUELU_AGGREGATIONPHASE2BALGORITHM_DEF_HPP_
11 #define MUELU_AGGREGATIONPHASE2BALGORITHM_DEF_HPP_
12 
13 #include <Teuchos_Comm.hpp>
14 #include <Teuchos_CommHelpers.hpp>
15 
16 #include <Xpetra_Vector.hpp>
17 
19 
20 #include "MueLu_Aggregates.hpp"
21 #include "MueLu_Exceptions.hpp"
22 #include "MueLu_LWGraph.hpp"
23 #include "MueLu_Monitor.hpp"
24 
25 namespace MueLu {
26 
27 // Try to stick unaggregated nodes into a neighboring aggregate if they are
28 // not already too big
29 template <class LocalOrdinal, class GlobalOrdinal, class Node>
31  Monitor m(*this, "BuildAggregatesNonKokkos");
32  bool matchMLbehavior = params.get<bool>("aggregation: match ML phase2b");
33 
34  const LO numRows = graph.GetNodeNumVertices();
35  const int myRank = graph.GetComm()->getRank();
36 
37  ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0);
38  ArrayRCP<LO> procWinner = aggregates.GetProcWinner()->getDataNonConst(0);
39 
40  LO numLocalAggregates = aggregates.GetNumAggregates();
41 
42  const int defaultConnectWeight = 100;
43  const int penaltyConnectWeight = 10;
44 
45  std::vector<int> aggWeight(numLocalAggregates, 0);
46  std::vector<int> connectWeight(numRows, defaultConnectWeight);
47  std::vector<int> aggPenalties(numRows, 0);
48 
49  // We do this cycle twice.
50  // I don't know why, but ML does it too
51  // taw: by running the aggregation routine more than once there is a chance that also
52  // non-aggregated nodes with a node distance of two are added to existing aggregates.
53  // Assuming that the aggregate size is 3 in each direction running the algorithm only twice
54  // should be sufficient.
55  for (int k = 0; k < 2; k++) {
56  for (LO i = 0; i < numRows; i++) {
57  if (aggStat[i] != READY)
58  continue;
59 
60  auto neighOfINode = graph.getNeighborVertices(i);
61 
62  for (int j = 0; j < neighOfINode.length; j++) {
63  LO neigh = neighOfINode(j);
64 
65  // We don't check (neigh != i), as it is covered by checking (aggStat[neigh] == AGGREGATED)
66  if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == AGGREGATED)
67  aggWeight[vertex2AggId[neigh]] += connectWeight[neigh];
68  }
69 
70  int bestScore = -100000;
71  int bestAggId = -1;
72  int bestConnect = -1;
73 
74  for (int j = 0; j < neighOfINode.length; j++) {
75  LO neigh = neighOfINode(j);
76  int aggId = vertex2AggId[neigh];
77 
78  // Note: The third condition is only relevant if the ML matching is enabled
79  if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == AGGREGATED && (!matchMLbehavior || aggWeight[aggId] != 0)) {
80  int score = aggWeight[aggId] - aggPenalties[aggId];
81 
82  if (score > bestScore) {
83  bestAggId = aggId;
84  bestScore = score;
85  bestConnect = connectWeight[neigh];
86 
87  } else if (aggId == bestAggId && connectWeight[neigh] > bestConnect) {
88  bestConnect = connectWeight[neigh];
89  }
90 
91  // Reset the weights for the next loop
92  aggWeight[aggId] = 0;
93  }
94  }
95 
96  if (bestScore >= 0) {
97  aggStat[i] = AGGREGATED;
98  vertex2AggId[i] = bestAggId;
99  procWinner[i] = myRank;
100 
101  numNonAggregatedNodes--;
102 
103  aggPenalties[bestAggId]++;
104  connectWeight[i] = bestConnect - penaltyConnectWeight;
105  }
106  }
107  }
108 }
109 
110 // Try to stick unaggregated nodes into a neighboring aggregate if they are
111 // not already too big
112 template <class LocalOrdinal, class GlobalOrdinal, class Node>
115  const LWGraph_kokkos& graph,
116  Aggregates& aggregates,
118  LO& numNonAggregatedNodes) const {
119  if (params.get<bool>("aggregation: deterministic")) {
120  Monitor m(*this, "BuildAggregatesDeterministic");
121  BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes);
122  } else {
123  Monitor m(*this, "BuildAggregatesRandom");
124  BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes);
125  }
126 
127 } // BuildAggregates
128 
129 template <class LO, class GO, class Node>
132  const LWGraph_kokkos& graph,
133  Aggregates& aggregates,
135  LO& numNonAggregatedNodes) const {
136  using device_type = typename LWGraph_kokkos::device_type;
137  using execution_space = typename LWGraph_kokkos::execution_space;
138 
139  const LO numRows = graph.GetNodeNumVertices();
140  const int myRank = graph.GetComm()->getRank();
141 
142  auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
143  auto procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
144  auto colors = aggregates.GetGraphColors();
145  const LO numColors = aggregates.GetGraphNumColors();
146  const LO numLocalAggregates = aggregates.GetNumAggregates();
147 
148  auto lclLWGraph = graph;
149 
150  const LO defaultConnectWeight = 100;
151  const LO penaltyConnectWeight = 10;
152 
153  Kokkos::View<LO*, device_type> aggWeight(Kokkos::ViewAllocateWithoutInitializing("aggWeight"), numLocalAggregates); // This gets re-initialized at the start of each "color" loop
154  Kokkos::View<LO*, device_type> connectWeight(Kokkos::ViewAllocateWithoutInitializing("connectWeight"), numRows);
155  Kokkos::View<LO*, device_type> aggPenalties("aggPenalties", numLocalAggregates); // This gets initialized to zero here
156 
157  Kokkos::deep_copy(connectWeight, defaultConnectWeight);
158 
159  // taw: by running the aggregation routine more than once there is a chance that also
160  // non-aggregated nodes with a node distance of two are added to existing aggregates.
161  // Assuming that the aggregate size is 3 in each direction running the algorithm only twice
162  // should be sufficient.
163  // lbv: If the prior phase of aggregation where run without specifying an aggregate size,
164  // the distance 2 coloring and phase 1 aggregation actually guarantee that only one iteration
165  // is needed to reach distance 2 neighbors.
166  int maxIters = 2;
167  int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
168  if (maxNodesPerAggregate == std::numeric_limits<int>::max()) {
169  maxIters = 1;
170  }
171  for (int iter = 0; iter < maxIters; ++iter) {
172  for (LO color = 1; color <= numColors; ++color) {
173  Kokkos::deep_copy(aggWeight, 0);
174 
175  // the reduce counts how many nodes are aggregated by this phase,
176  // which will then be subtracted from numNonAggregatedNodes
177  LO numAggregated = 0;
178  Kokkos::parallel_reduce(
179  "Aggregation Phase 2b: aggregates expansion",
180  Kokkos::RangePolicy<execution_space>(0, numRows),
181  KOKKOS_LAMBDA(const LO i, LO& tmpNumAggregated) {
182  if (aggStat(i) != READY || colors(i) != color)
183  return;
184 
185  auto neighOfINode = lclLWGraph.getNeighborVertices(i);
186  for (int j = 0; j < neighOfINode.length; j++) {
187  LO neigh = neighOfINode(j);
188 
189  // We don't check (neigh != i), as it is covered by checking
190  // (aggStat[neigh] == AGGREGATED)
191  if (lclLWGraph.isLocalNeighborVertex(neigh) &&
192  aggStat(neigh) == AGGREGATED)
193  Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)),
194  connectWeight(neigh));
195  }
196 
197  int bestScore = -100000;
198  int bestAggId = -1;
199  int bestConnect = -1;
200 
201  for (int j = 0; j < neighOfINode.length; j++) {
202  LO neigh = neighOfINode(j);
203 
204  if (lclLWGraph.isLocalNeighborVertex(neigh) &&
205  aggStat(neigh) == AGGREGATED) {
206  auto aggId = vertex2AggId(neigh, 0);
207  int score = aggWeight(aggId) - aggPenalties(aggId);
208 
209  if (score > bestScore) {
210  bestAggId = aggId;
211  bestScore = score;
212  bestConnect = connectWeight(neigh);
213 
214  } else if (aggId == bestAggId &&
215  connectWeight(neigh) > bestConnect) {
216  bestConnect = connectWeight(neigh);
217  }
218  }
219  }
220  if (bestScore >= 0) {
221  aggStat(i) = AGGREGATED;
222  vertex2AggId(i, 0) = bestAggId;
223  procWinner(i, 0) = myRank;
224 
225  Kokkos::atomic_add(&aggPenalties(bestAggId), 1);
226  connectWeight(i) = bestConnect - penaltyConnectWeight;
227  tmpNumAggregated++;
228  }
229  },
230  numAggregated); // parallel_for
231  numNonAggregatedNodes -= numAggregated;
232  }
233  } // loop over maxIters
234 
235 } // BuildAggregatesRandom
236 
237 template <class LO, class GO, class Node>
240  const LWGraph_kokkos& graph,
241  Aggregates& aggregates,
243  LO& numNonAggregatedNodes) const {
244  using device_type = typename LWGraph_kokkos::device_type;
245  using execution_space = typename LWGraph_kokkos::execution_space;
246 
247  const LO numRows = graph.GetNodeNumVertices();
248  const int myRank = graph.GetComm()->getRank();
249 
250  auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
251  auto procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
252  auto colors = aggregates.GetGraphColors();
253  const LO numColors = aggregates.GetGraphNumColors();
254  LO numLocalAggregates = aggregates.GetNumAggregates();
255 
256  auto lclLWGraph = graph;
257 
258  const int defaultConnectWeight = 100;
259  const int penaltyConnectWeight = 10;
260 
261  Kokkos::View<int*, device_type> connectWeight(Kokkos::ViewAllocateWithoutInitializing("connectWeight"), numRows);
262  Kokkos::View<int*, device_type> aggWeight(Kokkos::ViewAllocateWithoutInitializing("aggWeight"), numLocalAggregates); // This gets re-initialized at the start of each "color" loop
263  Kokkos::View<int*, device_type> aggPenaltyUpdates("aggPenaltyUpdates", numLocalAggregates);
264  Kokkos::View<int*, device_type> aggPenalties("aggPenalties", numLocalAggregates);
265 
266  Kokkos::deep_copy(connectWeight, defaultConnectWeight);
267 
268  // We do this cycle twice.
269  // I don't know why, but ML does it too
270  // taw: by running the aggregation routine more than once there is a chance that also
271  // non-aggregated nodes with a node distance of two are added to existing aggregates.
272  // Assuming that the aggregate size is 3 in each direction running the algorithm only twice
273  // should be sufficient.
274  int maxIters = 2;
275  int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
276  if (maxNodesPerAggregate == std::numeric_limits<int>::max()) {
277  maxIters = 1;
278  }
279  for (int iter = 0; iter < maxIters; ++iter) {
280  for (LO color = 1; color <= numColors; color++) {
281  Kokkos::deep_copy(aggWeight, 0);
282 
283  // the reduce counts how many nodes are aggregated by this phase,
284  // which will then be subtracted from numNonAggregatedNodes
285  LO numAggregated = 0;
286  Kokkos::parallel_for(
287  "Aggregation Phase 2b: updating agg weights",
288  Kokkos::RangePolicy<execution_space>(0, numRows),
289  KOKKOS_LAMBDA(const LO i) {
290  if (aggStat(i) != READY || colors(i) != color)
291  return;
292  auto neighOfINode = lclLWGraph.getNeighborVertices(i);
293  for (int j = 0; j < neighOfINode.length; j++) {
294  LO neigh = neighOfINode(j);
295  // We don't check (neigh != i), as it is covered by checking
296  // (aggStat[neigh] == AGGREGATED)
297  if (lclLWGraph.isLocalNeighborVertex(neigh) &&
298  aggStat(neigh) == AGGREGATED)
299  Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)),
300  connectWeight(neigh));
301  }
302  });
303 
304  Kokkos::parallel_reduce(
305  "Aggregation Phase 2b: aggregates expansion",
306  Kokkos::RangePolicy<execution_space>(0, numRows),
307  KOKKOS_LAMBDA(const LO i, LO& tmpNumAggregated) {
308  if (aggStat(i) != READY || colors(i) != color)
309  return;
310  int bestScore = -100000;
311  int bestAggId = -1;
312  int bestConnect = -1;
313 
314  auto neighOfINode = lclLWGraph.getNeighborVertices(i);
315  for (int j = 0; j < neighOfINode.length; j++) {
316  LO neigh = neighOfINode(j);
317 
318  if (lclLWGraph.isLocalNeighborVertex(neigh) &&
319  aggStat(neigh) == AGGREGATED) {
320  auto aggId = vertex2AggId(neigh, 0);
321  int score = aggWeight(aggId) - aggPenalties(aggId);
322 
323  if (score > bestScore) {
324  bestAggId = aggId;
325  bestScore = score;
326  bestConnect = connectWeight(neigh);
327 
328  } else if (aggId == bestAggId &&
329  connectWeight(neigh) > bestConnect) {
330  bestConnect = connectWeight(neigh);
331  }
332  }
333  }
334  if (bestScore >= 0) {
335  aggStat(i) = AGGREGATED;
336  vertex2AggId(i, 0) = bestAggId;
337  procWinner(i, 0) = myRank;
338 
339  Kokkos::atomic_add(&aggPenaltyUpdates(bestAggId), 1);
340  connectWeight(i) = bestConnect - penaltyConnectWeight;
341  tmpNumAggregated++;
342  }
343  },
344  numAggregated); // parallel_reduce
345 
346  Kokkos::parallel_for(
347  "Aggregation Phase 2b: updating agg penalties",
348  Kokkos::RangePolicy<execution_space>(0, numLocalAggregates),
349  KOKKOS_LAMBDA(const LO agg) {
350  aggPenalties(agg) += aggPenaltyUpdates(agg);
351  aggPenaltyUpdates(agg) = 0;
352  });
353  numNonAggregatedNodes -= numAggregated;
354  }
355  } // loop over k
356 } // BuildAggregatesDeterministic
357 
358 } // namespace MueLu
359 
360 #endif /* MUELU_AGGREGATIONPHASE2BALGORITHM_DEF_HPP_ */
Kokkos::View< unsigned *, typename LWGraphHostType::device_type > AggStatHostType
Lightweight MueLu representation of a compressed row storage graph.
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
Container class for aggregation information.
KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const
T & get(const std::string &name, T def_value)
LocalOrdinal LO
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
void BuildAggregatesDeterministic(const ParameterList &params, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
void BuildAggregatesNonKokkos(const ParameterList &params, const LWGraph &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatHostType &aggStat, LO &numNonAggregatedNodes) const
Local aggregation.
LO GetGraphNumColors()
Get the number of colors needed by the distance 2 coloring.
colors_view_type & GetGraphColors()
Get a distance 2 coloring of the underlying graph. The coloring is computed and set during Phase1 of ...
KOKKOS_INLINE_FUNCTION bool isLocalNeighborVertex(LO i) const
Return true if vertex with local id &#39;v&#39; is on current process.
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
void BuildAggregates(const ParameterList &params, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
void BuildAggregatesRandom(const ParameterList &params, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
KOKKOS_INLINE_FUNCTION neighbor_vertices_type getNeighborVertices(LO i) const
Return the list of vertices adjacent to the vertex &#39;v&#39;.
TransListIter iter
Timer to be used in non-factories.
Lightweight MueLu representation of a compressed row storage graph.
typename std::conditional< OnHost, Kokkos::Device< Kokkos::Serial, Kokkos::HostSpace >, typename Node::device_type >::type device_type
const RCP< const Teuchos::Comm< int > > GetComm() const
Kokkos::View< unsigned *, typename LWGraphType::device_type > AggStatType