MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_AggregationPhase2bAlgorithm_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // MueLu: A package for multigrid based preconditioning
4 //
5 // Copyright 2012 NTESS and the MueLu contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef MUELU_AGGREGATIONPHASE2BALGORITHM_DEF_HPP_
11 #define MUELU_AGGREGATIONPHASE2BALGORITHM_DEF_HPP_
12 
13 #include <Teuchos_Comm.hpp>
14 #include <Teuchos_CommHelpers.hpp>
15 
16 #include <Xpetra_Vector.hpp>
17 
19 
20 #include "MueLu_Aggregates.hpp"
21 #include "MueLu_Exceptions.hpp"
22 #include "MueLu_LWGraph.hpp"
23 #include "MueLu_Monitor.hpp"
24 
25 namespace MueLu {
26 
27 // Try to stick unaggregated nodes into a neighboring aggregate if they are
28 // not already too big
29 template <class LocalOrdinal, class GlobalOrdinal, class Node>
31  Monitor m(*this, "BuildAggregatesNonKokkos");
32  bool matchMLbehavior = params.get<bool>("aggregation: match ML phase2b");
33 
34  const LO numRows = graph.GetNodeNumVertices();
35  const int myRank = graph.GetComm()->getRank();
36 
37  ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0);
38  ArrayRCP<LO> procWinner = aggregates.GetProcWinner()->getDataNonConst(0);
39 
40  LO numLocalAggregates = aggregates.GetNumAggregates();
41 
42  const LO defaultConnectWeight = 100;
43  const LO penaltyConnectWeight = 10;
44 
45  std::vector<LO> aggWeight(numLocalAggregates, 0);
46  std::vector<LO> connectWeight(numRows, defaultConnectWeight);
47  std::vector<LO> aggPenalties(numRows, 0);
48 
49  // We do this cycle twice.
50  // I don't know why, but ML does it too
51  // taw: by running the aggregation routine more than once there is a chance that also
52  // non-aggregated nodes with a node distance of two are added to existing aggregates.
53  // Assuming that the aggregate size is 3 in each direction running the algorithm only twice
54  // should be sufficient.
55  for (int k = 0; k < 2; k++) {
56  for (LO i = 0; i < numRows; i++) {
57  if (aggStat[i] != READY)
58  continue;
59 
60  auto neighOfINode = graph.getNeighborVertices(i);
61 
62  for (int j = 0; j < neighOfINode.length; j++) {
63  LO neigh = neighOfINode(j);
64 
65  // We don't check (neigh != i), as it is covered by checking (aggStat[neigh] == AGGREGATED)
66  if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == AGGREGATED)
67  aggWeight[vertex2AggId[neigh]] += connectWeight[neigh];
68  }
69 
70  int bestScore = -100000;
71  int bestAggId = -1;
72  int bestConnect = -1;
73 
74  for (int j = 0; j < neighOfINode.length; j++) {
75  LO neigh = neighOfINode(j);
76  int aggId = vertex2AggId[neigh];
77 
78  // Note: The third condition is only relevant if the ML matching is enabled
79  if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == AGGREGATED && (!matchMLbehavior || aggWeight[aggId] != 0)) {
80  int score = aggWeight[aggId] - aggPenalties[aggId];
81 
82  if (score > bestScore) {
83  bestAggId = aggId;
84  bestScore = score;
85  bestConnect = connectWeight[neigh];
86 
87  } else if (aggId == bestAggId && connectWeight[neigh] > bestConnect) {
88  bestConnect = connectWeight[neigh];
89  }
90 
91  // Reset the weights for the next loop
92  aggWeight[aggId] = 0;
93  }
94  }
95 
96  if (bestScore >= 0) {
97  aggStat[i] = AGGREGATED;
98  vertex2AggId[i] = bestAggId;
99  procWinner[i] = myRank;
100 
101  numNonAggregatedNodes--;
102 
103  aggPenalties[bestAggId]++;
104  connectWeight[i] = bestConnect - penaltyConnectWeight;
105  }
106  }
107  }
108 }
109 
110 // Try to stick unaggregated nodes into a neighboring aggregate if they are
111 // not already too big
112 template <class LocalOrdinal, class GlobalOrdinal, class Node>
115  const LWGraph_kokkos& graph,
116  Aggregates& aggregates,
118  LO& numNonAggregatedNodes) const {
119  if (params.get<bool>("aggregation: deterministic")) {
120  Monitor m(*this, "BuildAggregatesDeterministic");
121  BuildAggregates<true>(params, graph, aggregates, aggStat, numNonAggregatedNodes);
122  } else {
123  Monitor m(*this, "BuildAggregatesRandom");
124  BuildAggregates<false>(params, graph, aggregates, aggStat, numNonAggregatedNodes);
125  }
126 
127 } // BuildAggregates
128 
129 template <class AggStatType, class ProcWinnerType, class Vertex2AggType, class ColorsType, class LocalGraphType, class AggPenaltyType, class LO, bool deterministic, bool matchMLbehavior>
131  private:
132  AggStatType aggStat;
133  ProcWinnerType procWinner;
134  Vertex2AggType vertex2AggId;
135  ColorsType colors;
136  LocalGraphType lclLWGraph;
137  AggPenaltyType aggPenalties;
138  AggPenaltyType aggPenaltyUpdates;
139  AggPenaltyType connectWeight;
143 
144  public:
145  ExpansionFunctor(AggStatType& aggStat_, ProcWinnerType& procWinner_, Vertex2AggType& vertex2AggId_, ColorsType& colors_, LocalGraphType& lclLWGraph_, AggPenaltyType& aggPenalties_, AggPenaltyType& aggPenaltyUpdates_, AggPenaltyType& connectWeight_, LO penaltyConnectWeight_, LO color_, LO rank_)
146  : aggStat(aggStat_)
147  , procWinner(procWinner_)
148  , vertex2AggId(vertex2AggId_)
149  , colors(colors_)
150  , lclLWGraph(lclLWGraph_)
151  , aggPenalties(aggPenalties_)
152  , aggPenaltyUpdates(aggPenaltyUpdates_)
153  , connectWeight(connectWeight_)
154  , penaltyConnectWeight(penaltyConnectWeight_)
155  , color(color_)
156  , myRank(rank_) {}
157 
158  ExpansionFunctor(AggStatType& aggStat_, ProcWinnerType& procWinner_, Vertex2AggType& vertex2AggId_, ColorsType& colors_, LocalGraphType& lclLWGraph_, AggPenaltyType& aggPenalties_, AggPenaltyType& connectWeight_, LO penaltyConnectWeight_, LO color_, LO rank_)
159  : aggStat(aggStat_)
160  , procWinner(procWinner_)
161  , vertex2AggId(vertex2AggId_)
162  , colors(colors_)
163  , lclLWGraph(lclLWGraph_)
164  , aggPenalties(aggPenalties_)
165  , connectWeight(connectWeight_)
166  , penaltyConnectWeight(penaltyConnectWeight_)
167  , color(color_)
168  , myRank(rank_) {}
169 
170  KOKKOS_INLINE_FUNCTION
171  void operator()(const LO& i, LO& tmpNumAggregated) const {
172  if (aggStat(i) != READY || colors(i) != color)
173  return;
174 
175  int bestScore = -100000;
176  int bestAggId = -1;
177  int bestConnect = -1;
178 
179  auto neighOfINode = lclLWGraph.getNeighborVertices(i);
180 
181  for (int j = 0; j < neighOfINode.length; j++) {
182  LO neigh = neighOfINode(j);
183 
184  if (lclLWGraph.isLocalNeighborVertex(neigh) &&
185  (aggStat(neigh) == AGGREGATED)) {
186  auto aggId = vertex2AggId(neigh, 0);
187  LO aggWeight = 0;
188  for (int k = 0; k < neighOfINode.length; k++) {
189  LO neigh2 = neighOfINode(k);
190  if (lclLWGraph.isLocalNeighborVertex(neigh2) &&
191  (aggStat(neigh2) == AGGREGATED) &&
192  (vertex2AggId(neigh2, 0) == aggId))
193  aggWeight += connectWeight(neigh2);
194  }
195 
196  if (matchMLbehavior && (aggWeight == 0))
197  return;
198 
199  int score = aggWeight - aggPenalties(aggId);
200 
201  if (score > bestScore) {
202  bestAggId = aggId;
203  bestScore = score;
204  bestConnect = connectWeight(neigh);
205 
206  } else if (aggId == bestAggId &&
207  connectWeight(neigh) > bestConnect) {
208  bestConnect = connectWeight(neigh);
209  }
210  }
211  }
212  if (bestScore >= 0) {
213  aggStat(i) = AGGREGATED;
214  vertex2AggId(i, 0) = bestAggId;
215  procWinner(i, 0) = myRank;
216 
217  if constexpr (deterministic) {
218  Kokkos::atomic_add(&aggPenaltyUpdates(bestAggId), 1);
219  } else {
220  Kokkos::atomic_add(&aggPenalties(bestAggId), 1);
221  }
222  connectWeight(i) = bestConnect - penaltyConnectWeight;
223  tmpNumAggregated++;
224  }
225  }
226 };
227 
228 template <class LocalOrdinal, class GlobalOrdinal, class Node>
229 template <bool deterministic>
232  const LWGraph_kokkos graph,
233  Aggregates& aggregates,
235  LO& numNonAggregatedNodes) const {
236  using device_type = typename LWGraph_kokkos::device_type;
237  using execution_space = typename LWGraph_kokkos::execution_space;
238 
239  bool matchMLbehavior = params.get<bool>("aggregation: match ML phase2b");
240 
241  const LO numRows = graph.GetNodeNumVertices();
242  const int myRank = graph.GetComm()->getRank();
243 
244  auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
245  auto procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
246  auto colors = aggregates.GetGraphColors();
247  const LO numColors = aggregates.GetGraphNumColors();
248  const LO numLocalAggregates = aggregates.GetNumAggregates();
249 
250  const LO defaultConnectWeight = 100;
251  const LO penaltyConnectWeight = 10;
252 
253  Kokkos::View<LO*, device_type> connectWeight(Kokkos::ViewAllocateWithoutInitializing("connectWeight"), numRows);
254  Kokkos::View<LO*, device_type> aggPenalties("aggPenalties", numLocalAggregates); // This gets initialized to zero here
255  Kokkos::View<LO*, device_type> aggPenaltyUpdates;
256  // if constexpr (deterministic)
257  aggPenaltyUpdates = Kokkos::View<LO*, device_type>("aggPenaltyUpdates", numLocalAggregates);
258 
259  Kokkos::deep_copy(connectWeight, defaultConnectWeight);
260 
261  // taw: by running the aggregation routine more than once there is a chance that also
262  // non-aggregated nodes with a node distance of two are added to existing aggregates.
263  // Assuming that the aggregate size is 3 in each direction running the algorithm only twice
264  // should be sufficient.
265  // lbv: If the prior phase of aggregation where run without specifying an aggregate size,
266  // the distance 2 coloring and phase 1 aggregation actually guarantee that only one iteration
267  // is needed to reach distance 2 neighbors.
268  int maxIters = 2;
269  int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
270  if (maxNodesPerAggregate == std::numeric_limits<int>::max()) {
271  maxIters = 1;
272  }
273  for (int iter = 0; iter < maxIters; ++iter) {
274  for (LO color = 1; color <= numColors; ++color) {
275  // the reduce counts how many nodes are aggregated by this phase,
276  // which will then be subtracted from numNonAggregatedNodes
277  LO numAggregated = 0;
278 
279  if constexpr (deterministic) {
280  if (matchMLbehavior) {
281  auto functor = ExpansionFunctor<decltype(aggStat), decltype(procWinner), decltype(vertex2AggId), decltype(colors), decltype(graph), decltype(aggPenalties), LO, true, true>(aggStat, procWinner, vertex2AggId, colors, graph, aggPenalties, aggPenaltyUpdates, connectWeight, penaltyConnectWeight, color, myRank);
282 
283  Kokkos::parallel_reduce("Aggregation Phase 2b: aggregates expansion",
284  Kokkos::RangePolicy<execution_space>(0, numRows),
285  functor,
286  numAggregated);
287  } else {
288  auto functor = ExpansionFunctor<decltype(aggStat), decltype(procWinner), decltype(vertex2AggId), decltype(colors), decltype(graph), decltype(aggPenalties), LO, true, false>(aggStat, procWinner, vertex2AggId, colors, graph, aggPenalties, aggPenaltyUpdates, connectWeight, penaltyConnectWeight, color, myRank);
289 
290  Kokkos::parallel_reduce("Aggregation Phase 2b: aggregates expansion",
291  Kokkos::RangePolicy<execution_space>(0, numRows),
292  functor,
293  numAggregated);
294  }
295  } else {
296  if (matchMLbehavior) {
297  auto functor = ExpansionFunctor<decltype(aggStat), decltype(procWinner), decltype(vertex2AggId), decltype(colors), decltype(graph), decltype(aggPenalties), LO, false, true>(aggStat, procWinner, vertex2AggId, colors, graph, aggPenalties, connectWeight, penaltyConnectWeight, color, myRank);
298 
299  Kokkos::parallel_reduce("Aggregation Phase 2b: aggregates expansion",
300  Kokkos::RangePolicy<execution_space>(0, numRows),
301  functor,
302  numAggregated);
303  } else {
304  auto functor = ExpansionFunctor<decltype(aggStat), decltype(procWinner), decltype(vertex2AggId), decltype(colors), decltype(graph), decltype(aggPenalties), LO, false, false>(aggStat, procWinner, vertex2AggId, colors, graph, aggPenalties, connectWeight, penaltyConnectWeight, color, myRank);
305 
306  Kokkos::parallel_reduce("Aggregation Phase 2b: aggregates expansion",
307  Kokkos::RangePolicy<execution_space>(0, numRows),
308  functor,
309  numAggregated);
310  }
311  }
312 
313  if constexpr (deterministic) {
314  Kokkos::parallel_for(
315  "Aggregation Phase 2b: updating agg penalties",
316  Kokkos::RangePolicy<execution_space>(0, numLocalAggregates),
317  KOKKOS_LAMBDA(const LO agg) {
318  aggPenalties(agg) += aggPenaltyUpdates(agg);
319  aggPenaltyUpdates(agg) = 0;
320  });
321  }
322 
323  numNonAggregatedNodes -= numAggregated;
324  }
325  } // loop over maxIters
326 
327 } // BuildAggregates
328 
329 } // namespace MueLu
330 
331 #endif /* MUELU_AGGREGATIONPHASE2BALGORITHM_DEF_HPP_ */
Kokkos::View< unsigned *, typename LWGraphHostType::device_type > AggStatHostType
Lightweight MueLu representation of a compressed row storage graph.
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
Container class for aggregation information.
KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const
T & get(const std::string &name, T def_value)
ExpansionFunctor(AggStatType &aggStat_, ProcWinnerType &procWinner_, Vertex2AggType &vertex2AggId_, ColorsType &colors_, LocalGraphType &lclLWGraph_, AggPenaltyType &aggPenalties_, AggPenaltyType &aggPenaltyUpdates_, AggPenaltyType &connectWeight_, LO penaltyConnectWeight_, LO color_, LO rank_)
LocalOrdinal LO
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
void BuildAggregatesNonKokkos(const ParameterList &params, const LWGraph &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatHostType &aggStat, LO &numNonAggregatedNodes) const
Local aggregation.
LO GetGraphNumColors()
Get the number of colors needed by the distance 2 coloring.
ExpansionFunctor(AggStatType &aggStat_, ProcWinnerType &procWinner_, Vertex2AggType &vertex2AggId_, ColorsType &colors_, LocalGraphType &lclLWGraph_, AggPenaltyType &aggPenalties_, AggPenaltyType &connectWeight_, LO penaltyConnectWeight_, LO color_, LO rank_)
KOKKOS_INLINE_FUNCTION void operator()(const LO &i, LO &tmpNumAggregated) const
colors_view_type & GetGraphColors()
Get a distance 2 coloring of the underlying graph. The coloring is computed and set during Phase1 of ...
KOKKOS_INLINE_FUNCTION bool isLocalNeighborVertex(LO i) const
Return true if vertex with local id &#39;v&#39; is on current process.
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
void BuildAggregates(const ParameterList &params, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
KOKKOS_INLINE_FUNCTION neighbor_vertices_type getNeighborVertices(LO i) const
Return the list of vertices adjacent to the vertex &#39;v&#39;.
TransListIter iter
Timer to be used in non-factories.
Lightweight MueLu representation of a compressed row storage graph.
typename std::conditional< OnHost, Kokkos::Device< Kokkos::Serial, Kokkos::HostSpace >, typename Node::device_type >::type device_type
const RCP< const Teuchos::Comm< int > > GetComm() const
Kokkos::View< unsigned *, typename LWGraphType::device_type > AggStatType