MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_AggregationPhase2bAlgorithm_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_AGGREGATIONPHASE2BALGORITHM_DEF_HPP_
47 #define MUELU_AGGREGATIONPHASE2BALGORITHM_DEF_HPP_
48 
49 #include <Teuchos_Comm.hpp>
50 #include <Teuchos_CommHelpers.hpp>
51 
52 #include <Xpetra_Vector.hpp>
53 
55 
56 #include "MueLu_Aggregates.hpp"
57 #include "MueLu_Exceptions.hpp"
58 #include "MueLu_LWGraph.hpp"
59 #include "MueLu_Monitor.hpp"
60 
61 namespace MueLu {
62 
63 // Try to stick unaggregated nodes into a neighboring aggregate if they are
64 // not already too big
65 template <class LocalOrdinal, class GlobalOrdinal, class Node>
67  Monitor m(*this, "BuildAggregatesNonKokkos");
68  bool matchMLbehavior = params.get<bool>("aggregation: match ML phase2b");
69 
70  const LO numRows = graph.GetNodeNumVertices();
71  const int myRank = graph.GetComm()->getRank();
72 
73  ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0);
74  ArrayRCP<LO> procWinner = aggregates.GetProcWinner()->getDataNonConst(0);
75 
76  LO numLocalAggregates = aggregates.GetNumAggregates();
77 
78  const int defaultConnectWeight = 100;
79  const int penaltyConnectWeight = 10;
80 
81  std::vector<int> aggWeight(numLocalAggregates, 0);
82  std::vector<int> connectWeight(numRows, defaultConnectWeight);
83  std::vector<int> aggPenalties(numRows, 0);
84 
85  // We do this cycle twice.
86  // I don't know why, but ML does it too
87  // taw: by running the aggregation routine more than once there is a chance that also
88  // non-aggregated nodes with a node distance of two are added to existing aggregates.
89  // Assuming that the aggregate size is 3 in each direction running the algorithm only twice
90  // should be sufficient.
91  for (int k = 0; k < 2; k++) {
92  for (LO i = 0; i < numRows; i++) {
93  if (aggStat[i] != READY)
94  continue;
95 
96  auto neighOfINode = graph.getNeighborVertices(i);
97 
98  for (int j = 0; j < neighOfINode.length; j++) {
99  LO neigh = neighOfINode(j);
100 
101  // We don't check (neigh != i), as it is covered by checking (aggStat[neigh] == AGGREGATED)
102  if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == AGGREGATED)
103  aggWeight[vertex2AggId[neigh]] += connectWeight[neigh];
104  }
105 
106  int bestScore = -100000;
107  int bestAggId = -1;
108  int bestConnect = -1;
109 
110  for (int j = 0; j < neighOfINode.length; j++) {
111  LO neigh = neighOfINode(j);
112  int aggId = vertex2AggId[neigh];
113 
114  // Note: The third condition is only relevant if the ML matching is enabled
115  if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == AGGREGATED && (!matchMLbehavior || aggWeight[aggId] != 0)) {
116  int score = aggWeight[aggId] - aggPenalties[aggId];
117 
118  if (score > bestScore) {
119  bestAggId = aggId;
120  bestScore = score;
121  bestConnect = connectWeight[neigh];
122 
123  } else if (aggId == bestAggId && connectWeight[neigh] > bestConnect) {
124  bestConnect = connectWeight[neigh];
125  }
126 
127  // Reset the weights for the next loop
128  aggWeight[aggId] = 0;
129  }
130  }
131 
132  if (bestScore >= 0) {
133  aggStat[i] = AGGREGATED;
134  vertex2AggId[i] = bestAggId;
135  procWinner[i] = myRank;
136 
137  numNonAggregatedNodes--;
138 
139  aggPenalties[bestAggId]++;
140  connectWeight[i] = bestConnect - penaltyConnectWeight;
141  }
142  }
143  }
144 }
145 
146 // Try to stick unaggregated nodes into a neighboring aggregate if they are
147 // not already too big
148 template <class LocalOrdinal, class GlobalOrdinal, class Node>
151  const LWGraph_kokkos& graph,
152  Aggregates& aggregates,
154  LO& numNonAggregatedNodes) const {
155  if (params.get<bool>("aggregation: deterministic")) {
156  Monitor m(*this, "BuildAggregatesDeterministic");
157  BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes);
158  } else {
159  Monitor m(*this, "BuildAggregatesRandom");
160  BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes);
161  }
162 
163 } // BuildAggregates
164 
165 template <class LO, class GO, class Node>
168  const LWGraph_kokkos& graph,
169  Aggregates& aggregates,
171  LO& numNonAggregatedNodes) const {
172  using device_type = typename LWGraph_kokkos::device_type;
173  using execution_space = typename LWGraph_kokkos::execution_space;
174 
175  const LO numRows = graph.GetNodeNumVertices();
176  const int myRank = graph.GetComm()->getRank();
177 
178  auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
179  auto procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
180  auto colors = aggregates.GetGraphColors();
181  const LO numColors = aggregates.GetGraphNumColors();
182  const LO numLocalAggregates = aggregates.GetNumAggregates();
183 
184  auto lclLWGraph = graph;
185 
186  const LO defaultConnectWeight = 100;
187  const LO penaltyConnectWeight = 10;
188 
189  Kokkos::View<LO*, device_type> aggWeight(Kokkos::ViewAllocateWithoutInitializing("aggWeight"), numLocalAggregates); // This gets re-initialized at the start of each "color" loop
190  Kokkos::View<LO*, device_type> connectWeight(Kokkos::ViewAllocateWithoutInitializing("connectWeight"), numRows);
191  Kokkos::View<LO*, device_type> aggPenalties("aggPenalties", numLocalAggregates); // This gets initialized to zero here
192 
193  Kokkos::deep_copy(connectWeight, defaultConnectWeight);
194 
195  // taw: by running the aggregation routine more than once there is a chance that also
196  // non-aggregated nodes with a node distance of two are added to existing aggregates.
197  // Assuming that the aggregate size is 3 in each direction running the algorithm only twice
198  // should be sufficient.
199  // lbv: If the prior phase of aggregation where run without specifying an aggregate size,
200  // the distance 2 coloring and phase 1 aggregation actually guarantee that only one iteration
201  // is needed to reach distance 2 neighbors.
202  int maxIters = 2;
203  int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
204  if (maxNodesPerAggregate == std::numeric_limits<int>::max()) {
205  maxIters = 1;
206  }
207  for (int iter = 0; iter < maxIters; ++iter) {
208  for (LO color = 1; color <= numColors; ++color) {
209  Kokkos::deep_copy(aggWeight, 0);
210 
211  // the reduce counts how many nodes are aggregated by this phase,
212  // which will then be subtracted from numNonAggregatedNodes
213  LO numAggregated = 0;
214  Kokkos::parallel_reduce(
215  "Aggregation Phase 2b: aggregates expansion",
216  Kokkos::RangePolicy<execution_space>(0, numRows),
217  KOKKOS_LAMBDA(const LO i, LO& tmpNumAggregated) {
218  if (aggStat(i) != READY || colors(i) != color)
219  return;
220 
221  auto neighOfINode = lclLWGraph.getNeighborVertices(i);
222  for (int j = 0; j < neighOfINode.length; j++) {
223  LO neigh = neighOfINode(j);
224 
225  // We don't check (neigh != i), as it is covered by checking
226  // (aggStat[neigh] == AGGREGATED)
227  if (lclLWGraph.isLocalNeighborVertex(neigh) &&
228  aggStat(neigh) == AGGREGATED)
229  Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)),
230  connectWeight(neigh));
231  }
232 
233  int bestScore = -100000;
234  int bestAggId = -1;
235  int bestConnect = -1;
236 
237  for (int j = 0; j < neighOfINode.length; j++) {
238  LO neigh = neighOfINode(j);
239 
240  if (lclLWGraph.isLocalNeighborVertex(neigh) &&
241  aggStat(neigh) == AGGREGATED) {
242  auto aggId = vertex2AggId(neigh, 0);
243  int score = aggWeight(aggId) - aggPenalties(aggId);
244 
245  if (score > bestScore) {
246  bestAggId = aggId;
247  bestScore = score;
248  bestConnect = connectWeight(neigh);
249 
250  } else if (aggId == bestAggId &&
251  connectWeight(neigh) > bestConnect) {
252  bestConnect = connectWeight(neigh);
253  }
254  }
255  }
256  if (bestScore >= 0) {
257  aggStat(i) = AGGREGATED;
258  vertex2AggId(i, 0) = bestAggId;
259  procWinner(i, 0) = myRank;
260 
261  Kokkos::atomic_add(&aggPenalties(bestAggId), 1);
262  connectWeight(i) = bestConnect - penaltyConnectWeight;
263  tmpNumAggregated++;
264  }
265  },
266  numAggregated); // parallel_for
267  numNonAggregatedNodes -= numAggregated;
268  }
269  } // loop over maxIters
270 
271 } // BuildAggregatesRandom
272 
273 template <class LO, class GO, class Node>
276  const LWGraph_kokkos& graph,
277  Aggregates& aggregates,
279  LO& numNonAggregatedNodes) const {
280  using device_type = typename LWGraph_kokkos::device_type;
281  using execution_space = typename LWGraph_kokkos::execution_space;
282 
283  const LO numRows = graph.GetNodeNumVertices();
284  const int myRank = graph.GetComm()->getRank();
285 
286  auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
287  auto procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
288  auto colors = aggregates.GetGraphColors();
289  const LO numColors = aggregates.GetGraphNumColors();
290  LO numLocalAggregates = aggregates.GetNumAggregates();
291 
292  auto lclLWGraph = graph;
293 
294  const int defaultConnectWeight = 100;
295  const int penaltyConnectWeight = 10;
296 
297  Kokkos::View<int*, device_type> connectWeight(Kokkos::ViewAllocateWithoutInitializing("connectWeight"), numRows);
298  Kokkos::View<int*, device_type> aggWeight(Kokkos::ViewAllocateWithoutInitializing("aggWeight"), numLocalAggregates); // This gets re-initialized at the start of each "color" loop
299  Kokkos::View<int*, device_type> aggPenaltyUpdates("aggPenaltyUpdates", numLocalAggregates);
300  Kokkos::View<int*, device_type> aggPenalties("aggPenalties", numLocalAggregates);
301 
302  Kokkos::deep_copy(connectWeight, defaultConnectWeight);
303 
304  // We do this cycle twice.
305  // I don't know why, but ML does it too
306  // taw: by running the aggregation routine more than once there is a chance that also
307  // non-aggregated nodes with a node distance of two are added to existing aggregates.
308  // Assuming that the aggregate size is 3 in each direction running the algorithm only twice
309  // should be sufficient.
310  int maxIters = 2;
311  int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
312  if (maxNodesPerAggregate == std::numeric_limits<int>::max()) {
313  maxIters = 1;
314  }
315  for (int iter = 0; iter < maxIters; ++iter) {
316  for (LO color = 1; color <= numColors; color++) {
317  Kokkos::deep_copy(aggWeight, 0);
318 
319  // the reduce counts how many nodes are aggregated by this phase,
320  // which will then be subtracted from numNonAggregatedNodes
321  LO numAggregated = 0;
322  Kokkos::parallel_for(
323  "Aggregation Phase 2b: updating agg weights",
324  Kokkos::RangePolicy<execution_space>(0, numRows),
325  KOKKOS_LAMBDA(const LO i) {
326  if (aggStat(i) != READY || colors(i) != color)
327  return;
328  auto neighOfINode = lclLWGraph.getNeighborVertices(i);
329  for (int j = 0; j < neighOfINode.length; j++) {
330  LO neigh = neighOfINode(j);
331  // We don't check (neigh != i), as it is covered by checking
332  // (aggStat[neigh] == AGGREGATED)
333  if (lclLWGraph.isLocalNeighborVertex(neigh) &&
334  aggStat(neigh) == AGGREGATED)
335  Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)),
336  connectWeight(neigh));
337  }
338  });
339 
340  Kokkos::parallel_reduce(
341  "Aggregation Phase 2b: aggregates expansion",
342  Kokkos::RangePolicy<execution_space>(0, numRows),
343  KOKKOS_LAMBDA(const LO i, LO& tmpNumAggregated) {
344  if (aggStat(i) != READY || colors(i) != color)
345  return;
346  int bestScore = -100000;
347  int bestAggId = -1;
348  int bestConnect = -1;
349 
350  auto neighOfINode = lclLWGraph.getNeighborVertices(i);
351  for (int j = 0; j < neighOfINode.length; j++) {
352  LO neigh = neighOfINode(j);
353 
354  if (lclLWGraph.isLocalNeighborVertex(neigh) &&
355  aggStat(neigh) == AGGREGATED) {
356  auto aggId = vertex2AggId(neigh, 0);
357  int score = aggWeight(aggId) - aggPenalties(aggId);
358 
359  if (score > bestScore) {
360  bestAggId = aggId;
361  bestScore = score;
362  bestConnect = connectWeight(neigh);
363 
364  } else if (aggId == bestAggId &&
365  connectWeight(neigh) > bestConnect) {
366  bestConnect = connectWeight(neigh);
367  }
368  }
369  }
370  if (bestScore >= 0) {
371  aggStat(i) = AGGREGATED;
372  vertex2AggId(i, 0) = bestAggId;
373  procWinner(i, 0) = myRank;
374 
375  Kokkos::atomic_add(&aggPenaltyUpdates(bestAggId), 1);
376  connectWeight(i) = bestConnect - penaltyConnectWeight;
377  tmpNumAggregated++;
378  }
379  },
380  numAggregated); // parallel_reduce
381 
382  Kokkos::parallel_for(
383  "Aggregation Phase 2b: updating agg penalties",
384  Kokkos::RangePolicy<execution_space>(0, numLocalAggregates),
385  KOKKOS_LAMBDA(const LO agg) {
386  aggPenalties(agg) += aggPenaltyUpdates(agg);
387  aggPenaltyUpdates(agg) = 0;
388  });
389  numNonAggregatedNodes -= numAggregated;
390  }
391  } // loop over k
392 } // BuildAggregatesDeterministic
393 
394 } // namespace MueLu
395 
396 #endif /* MUELU_AGGREGATIONPHASE2BALGORITHM_DEF_HPP_ */
Kokkos::View< unsigned *, typename LWGraphHostType::device_type > AggStatHostType
Lightweight MueLu representation of a compressed row storage graph.
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
Container class for aggregation information.
KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const
T & get(const std::string &name, T def_value)
LocalOrdinal LO
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
void BuildAggregatesDeterministic(const ParameterList &params, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
void BuildAggregatesNonKokkos(const ParameterList &params, const LWGraph &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatHostType &aggStat, LO &numNonAggregatedNodes) const
Local aggregation.
LO GetGraphNumColors()
Get the number of colors needed by the distance 2 coloring.
colors_view_type & GetGraphColors()
Get a distance 2 coloring of the underlying graph. The coloring is computed and set during Phase1 of ...
KOKKOS_INLINE_FUNCTION bool isLocalNeighborVertex(LO i) const
Return true if vertex with local id &#39;v&#39; is on current process.
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
void BuildAggregates(const ParameterList &params, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
void BuildAggregatesRandom(const ParameterList &params, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
KOKKOS_INLINE_FUNCTION neighbor_vertices_type getNeighborVertices(LO i) const
Return the list of vertices adjacent to the vertex &#39;v&#39;.
TransListIter iter
Timer to be used in non-factories.
Lightweight MueLu representation of a compressed row storage graph.
typename std::conditional< OnHost, Kokkos::Device< Kokkos::Serial, Kokkos::HostSpace >, typename Node::device_type >::type device_type
const RCP< const Teuchos::Comm< int > > GetComm() const
Kokkos::View< unsigned *, typename LWGraphType::device_type > AggStatType