MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_AggregationPhase2aAlgorithm_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_AGGREGATIONPHASE2AALGORITHM_DEF_HPP_
47 #define MUELU_AGGREGATIONPHASE2AALGORITHM_DEF_HPP_
48 
49 #include <Teuchos_Comm.hpp>
50 #include <Teuchos_CommHelpers.hpp>
51 
52 #include <Xpetra_Vector.hpp>
53 
55 
56 #include "MueLu_LWGraph.hpp"
57 #include "MueLu_Aggregates.hpp"
58 #include "MueLu_Exceptions.hpp"
59 #include "MueLu_Monitor.hpp"
60 
61 #include "Kokkos_Sort.hpp"
62 
63 namespace MueLu {
64 
65 template <class LocalOrdinal, class GlobalOrdinal, class Node>
67  Monitor m(*this, "BuildAggregatesNonKokkos");
68 
69  int minNodesPerAggregate = params.get<int>("aggregation: min agg size");
70  int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
71  bool matchMLbehavior = params.get<bool>("aggregation: match ML phase2a");
72 
73  const LO numRows = graph.GetNodeNumVertices();
74  const int myRank = graph.GetComm()->getRank();
75 
76  ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0);
77  ArrayRCP<LO> procWinner = aggregates.GetProcWinner()->getDataNonConst(0);
78 
79  LO numLocalAggregates = aggregates.GetNumAggregates();
80 
81  LO numLocalNodes = procWinner.size();
82  LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
83 
84  const double aggFactor = params.get<double>("aggregation: phase2a agg factor");
85  double factor;
86 
87  if (matchMLbehavior) {
88  // Note: ML uses global counts to set the factor
89  // Passing # of nonaggregated nodes and # of nodes via aggStat
90  GO in_data[2] = {(GO)numNonAggregatedNodes, (GO)aggStat.size()};
91  GO out_data[2];
92  Teuchos::reduceAll(*graph.GetComm(), Teuchos::REDUCE_SUM, 2, in_data, out_data);
93  GO phase_one_aggregated = out_data[1] - out_data[0];
94  factor = as<double>(phase_one_aggregated) / (out_data[1] + 1);
95 
96  LO agg_stat_unaggregated = 0;
97  LO agg_stat_aggregated = 0;
98  LO agg_stat_bdry = 0;
99  for (LO i = 0; i < (LO)aggStat.size(); i++) {
100  if (aggStat[i] == AGGREGATED)
101  agg_stat_aggregated++;
102  else if (aggStat[i] == BOUNDARY)
103  agg_stat_bdry++;
104  else
105  agg_stat_unaggregated++;
106  }
107 
108  // NOTE: ML always uses 3 as minNodesPerAggregate
109  minNodesPerAggregate = 3;
110 
111  } else {
112  // MueLu defaults to using local counts to set the factor
113  factor = as<double>(numLocalAggregated) / (numLocalNodes + 1);
114  }
115 
116  // Now apply aggFactor
117  factor = pow(factor, aggFactor);
118 
119  int aggIndex = -1;
120  size_t aggSize = 0;
121  std::vector<int> aggList(graph.getLocalMaxNumRowEntries());
122 
123  for (LO rootCandidate = 0; rootCandidate < numRows; rootCandidate++) {
124  if (aggStat[rootCandidate] != READY) {
125  continue;
126  }
127 
128  LO numNeighbors = 0;
129  aggSize = 0;
130  if (matchMLbehavior) {
131  aggList[aggSize++] = rootCandidate;
132  numNeighbors++;
133  }
134 
135  auto neighOfINode = graph.getNeighborVertices(rootCandidate);
136 
137  LO num_nonaggd_neighbors = 0, num_local_neighbors = 0;
138  for (int j = 0; j < neighOfINode.length; j++) {
139  LO neigh = neighOfINode(j);
140  if (graph.isLocalNeighborVertex(neigh))
141  num_local_neighbors++;
142 
143  if (neigh != rootCandidate) {
144  if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == READY) {
145  // If aggregate size does not exceed max size, add node to the tentative aggregate
146  // NOTE: We do not exit the loop over all neighbours since we have still
147  // to count all aggregated neighbour nodes for the aggregation criteria
148  // NOTE: We check here for the maximum aggregation size. If we would do it below
149  // with all the other check too big aggregates would not be accepted at all.
150  if (aggSize < as<size_t>(maxNodesPerAggregate))
151  aggList[aggSize++] = neigh;
152  num_nonaggd_neighbors++;
153  }
154 
155  numNeighbors++;
156  }
157  }
158 
159  bool accept_aggregate;
160  if (matchMLbehavior) {
161  // ML does this calculation slightly differently than MueLu does by default, specifically it
162  // uses the *local* number of neigbors, regardless of what they are.
163  // NOTE: ML does zero compression here. Not sure if it matters
164  // NOTE: ML uses a hardcoded value 3 instead of minNodesPerAggregate. This has been set above
165  LO rowi_N = num_local_neighbors;
166  num_nonaggd_neighbors++; // ML counts the node itself as a nonaggd_neighbor
167  accept_aggregate = (rowi_N > as<LO>(minNodesPerAggregate)) && (num_nonaggd_neighbors > (factor * rowi_N));
168  } else {
169  accept_aggregate = (aggSize > as<size_t>(minNodesPerAggregate)) && (aggSize > factor * numNeighbors);
170  }
171 
172  if (accept_aggregate) {
173  // Accept new aggregate
174  // rootCandidate becomes the root of the newly formed aggregate
175  aggregates.SetIsRoot(rootCandidate);
176  aggIndex = numLocalAggregates++;
177 
178  for (size_t k = 0; k < aggSize; k++) {
179  aggStat[aggList[k]] = AGGREGATED;
180  vertex2AggId[aggList[k]] = aggIndex;
181  procWinner[aggList[k]] = myRank;
182  }
183 
184  numNonAggregatedNodes -= aggSize;
185  }
186  }
187 
188  // update aggregate object
189  aggregates.SetNumAggregates(numLocalAggregates);
190 }
191 
192 template <class LocalOrdinal, class GlobalOrdinal, class Node>
195  const LWGraph_kokkos& graph,
196  Aggregates& aggregates,
198  LO& numNonAggregatedNodes) const {
199  if (params.get<bool>("aggregation: deterministic")) {
200  Monitor m(*this, "BuildAggregatesDeterministic");
201  BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes);
202  } else {
203  Monitor m(*this, "BuildAggregatesRandom");
204  BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes);
205  }
206 
207 } // BuildAggregates
208 
209 template <class LO, class GO, class Node>
212  const LWGraph_kokkos& graph,
213  Aggregates& aggregates,
215  LO& numNonAggregatedNodes) const {
216  using device_type = typename LWGraph_kokkos::device_type;
217  using execution_space = typename LWGraph_kokkos::execution_space;
218 
219  const int minNodesPerAggregate = params.get<int>("aggregation: min agg size");
220  const int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
221  bool matchMLbehavior = params.get<bool>("aggregation: match ML phase2a");
222 
223  const LO numRows = graph.GetNodeNumVertices();
224  const int myRank = graph.GetComm()->getRank();
225 
226  auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
227  auto procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
228  auto colors = aggregates.GetGraphColors();
229  const LO numColors = aggregates.GetGraphNumColors();
230 
231  auto lclLWGraph = graph;
232 
233  LO numLocalNodes = numRows;
234  LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
235 
236  const double aggFactor = 0.5;
237  double factor = static_cast<double>(numLocalAggregated) / (numLocalNodes + 1);
238  factor = pow(factor, aggFactor);
239 
240  // LBV on Sept 12, 2019: this looks a little heavy handed,
241  // I'm not sure a view is needed to perform atomic updates.
242  // If we can avoid this and use a simple LO that would be
243  // simpler for later maintenance.
244  Kokkos::View<LO, device_type> numLocalAggregates("numLocalAggregates");
245  typename Kokkos::View<LO, device_type>::HostMirror h_numLocalAggregates =
246  Kokkos::create_mirror_view(numLocalAggregates);
247  h_numLocalAggregates() = aggregates.GetNumAggregates();
248  Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
249 
250  // Now we create new aggregates using root nodes in all colors other than the first color,
251  // as the first color was already exhausted in Phase 1.
252  for (int color = 2; color < numColors + 1; ++color) {
253  LO tmpNumNonAggregatedNodes = 0;
254  Kokkos::parallel_reduce(
255  "Aggregation Phase 2a: loop over each individual color",
256  Kokkos::RangePolicy<execution_space>(0, numRows),
257  KOKKOS_LAMBDA(const LO rootCandidate, LO& lNumNonAggregatedNodes) {
258  if (aggStat(rootCandidate) == READY &&
259  colors(rootCandidate) == color) {
260  LO numNeighbors = 0;
261  LO aggSize = 0;
262  if (matchMLbehavior) {
263  aggSize += 1;
264  numNeighbors += 1;
265  }
266 
267  auto neighbors = lclLWGraph.getNeighborVertices(rootCandidate);
268 
269  // Loop over neighbors to count how many nodes could join
270  // the new aggregate
271 
272  for (int j = 0; j < neighbors.length; ++j) {
273  LO neigh = neighbors(j);
274  if (neigh != rootCandidate) {
275  if (lclLWGraph.isLocalNeighborVertex(neigh) &&
276  (aggStat(neigh) == READY) &&
277  (aggSize < maxNodesPerAggregate)) {
278  ++aggSize;
279  }
280  ++numNeighbors;
281  }
282  }
283 
284  // If a sufficient number of nodes can join the new aggregate
285  // then we actually create the aggregate.
286  if (aggSize > minNodesPerAggregate &&
287  (aggSize > factor * numNeighbors)) {
288  // aggregates.SetIsRoot(rootCandidate);
289  LO aggIndex = Kokkos::
290  atomic_fetch_add(&numLocalAggregates(), 1);
291 
292  LO numAggregated = 0;
293 
294  if (matchMLbehavior) {
295  // Add the root.
296  aggStat(rootCandidate) = AGGREGATED;
297  vertex2AggId(rootCandidate, 0) = aggIndex;
298  procWinner(rootCandidate, 0) = myRank;
299  ++numAggregated;
300  --lNumNonAggregatedNodes;
301  }
302 
303  for (int neighIdx = 0; neighIdx < neighbors.length; ++neighIdx) {
304  LO neigh = neighbors(neighIdx);
305  if (neigh != rootCandidate) {
306  if (lclLWGraph.isLocalNeighborVertex(neigh) &&
307  (aggStat(neigh) == READY) &&
308  (numAggregated < aggSize)) {
309  aggStat(neigh) = AGGREGATED;
310  vertex2AggId(neigh, 0) = aggIndex;
311  procWinner(neigh, 0) = myRank;
312 
313  ++numAggregated;
314  --lNumNonAggregatedNodes;
315  }
316  }
317  }
318  }
319  }
320  },
321  tmpNumNonAggregatedNodes);
322  numNonAggregatedNodes += tmpNumNonAggregatedNodes;
323  }
324 
325  // update aggregate object
326  Kokkos::deep_copy(h_numLocalAggregates, numLocalAggregates);
327  aggregates.SetNumAggregates(h_numLocalAggregates());
328 } // BuildAggregatesRandom
329 
330 template <class LO, class GO, class Node>
333  const LWGraph_kokkos& graph,
334  Aggregates& aggregates,
336  LO& numNonAggregatedNodes) const {
337  using device_type = typename LWGraph_kokkos::device_type;
338  using execution_space = typename LWGraph_kokkos::execution_space;
339 
340  const int minNodesPerAggregate = params.get<int>("aggregation: min agg size");
341  const int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
342 
343  const LO numRows = graph.GetNodeNumVertices();
344  const int myRank = graph.GetComm()->getRank();
345 
346  auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
347  auto procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
348  auto colors = aggregates.GetGraphColors();
349  const LO numColors = aggregates.GetGraphNumColors();
350 
351  auto lclLWGraph = graph;
352 
353  LO numLocalNodes = procWinner.size();
354  LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
355 
356  const double aggFactor = 0.5;
357  double factor = as<double>(numLocalAggregated) / (numLocalNodes + 1);
358  factor = pow(factor, aggFactor);
359 
360  Kokkos::View<LO, device_type> numLocalAggregates("numLocalAggregates");
361  typename Kokkos::View<LO, device_type>::HostMirror h_numLocalAggregates =
362  Kokkos::create_mirror_view(numLocalAggregates);
363  h_numLocalAggregates() = aggregates.GetNumAggregates();
364  Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
365 
366  // Now we create new aggregates using root nodes in all colors other than the first color,
367  // as the first color was already exhausted in Phase 1.
368  //
369  // In the deterministic version, exactly the same set of aggregates will be created
370  // (as the nondeterministic version)
371  // because no vertex V can be a neighbor of two vertices of the same color, so two root
372  // candidates can't fight over V
373  //
374  // But, the precise values in vertex2AggId need to match exactly, so just sort the new
375  // roots of each color before assigning aggregate IDs
376 
377  // numNonAggregatedNodes is the best available upper bound for the number of aggregates
378  // which may be created in this phase, so use it for the size of newRoots
379  Kokkos::View<LO*, device_type> newRoots("New root LIDs", numNonAggregatedNodes);
380  Kokkos::View<LO, device_type> numNewRoots("Number of new aggregates of current color");
381  auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots);
382  for (int color = 1; color < numColors + 1; ++color) {
383  h_numNewRoots() = 0;
384  Kokkos::deep_copy(numNewRoots, h_numNewRoots);
385  Kokkos::parallel_for(
386  "Aggregation Phase 2a: determining new roots of current color",
387  Kokkos::RangePolicy<execution_space>(0, numRows),
388  KOKKOS_LAMBDA(const LO rootCandidate) {
389  if (aggStat(rootCandidate) == READY &&
390  colors(rootCandidate) == color) {
391  LO aggSize = 0;
392  auto neighbors = lclLWGraph.getNeighborVertices(rootCandidate);
393  // Loop over neighbors to count how many nodes could join
394  // the new aggregate
395  LO numNeighbors = 0;
396  for (int j = 0; j < neighbors.length; ++j) {
397  LO neigh = neighbors(j);
398  if (neigh != rootCandidate) {
399  if (lclLWGraph.isLocalNeighborVertex(neigh) &&
400  aggStat(neigh) == READY &&
401  aggSize < maxNodesPerAggregate) {
402  ++aggSize;
403  }
404  ++numNeighbors;
405  }
406  }
407  // If a sufficient number of nodes can join the new aggregate
408  // then we mark rootCandidate as a future root.
409  if (aggSize > minNodesPerAggregate && aggSize > factor * numNeighbors) {
410  LO newRootIndex = Kokkos::atomic_fetch_add(&numNewRoots(), 1);
411  newRoots(newRootIndex) = rootCandidate;
412  }
413  }
414  });
415  Kokkos::deep_copy(h_numNewRoots, numNewRoots);
416 
417  if (h_numNewRoots() > 0) {
418  // sort the new root indices
419  Kokkos::sort(newRoots, 0, h_numNewRoots());
420  // now, loop over all new roots again and actually create the aggregates
421  LO tmpNumNonAggregatedNodes = 0;
422  // First, just find the set of color vertices which will become aggregate roots
423  Kokkos::parallel_reduce(
424  "Aggregation Phase 2a: create new aggregates",
425  Kokkos::RangePolicy<execution_space>(0, h_numNewRoots()),
426  KOKKOS_LAMBDA(const LO newRootIndex, LO& lNumNonAggregatedNodes) {
427  LO root = newRoots(newRootIndex);
428  LO newAggID = numLocalAggregates() + newRootIndex;
429  auto neighbors = lclLWGraph.getNeighborVertices(root);
430  // Loop over neighbors and add them to new aggregate
431  aggStat(root) = AGGREGATED;
432  vertex2AggId(root, 0) = newAggID;
433  LO aggSize = 1;
434  for (int j = 0; j < neighbors.length; ++j) {
435  LO neigh = neighbors(j);
436  if (neigh != root) {
437  if (lclLWGraph.isLocalNeighborVertex(neigh) &&
438  aggStat(neigh) == READY &&
439  aggSize < maxNodesPerAggregate) {
440  aggStat(neigh) = AGGREGATED;
441  vertex2AggId(neigh, 0) = newAggID;
442  procWinner(neigh, 0) = myRank;
443  aggSize++;
444  }
445  }
446  }
447  lNumNonAggregatedNodes -= aggSize;
448  },
449  tmpNumNonAggregatedNodes);
450  numNonAggregatedNodes += tmpNumNonAggregatedNodes;
451  h_numLocalAggregates() += h_numNewRoots();
452  Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
453  }
454  }
455  aggregates.SetNumAggregates(h_numLocalAggregates());
456 }
457 
458 } // namespace MueLu
459 
460 #endif /* MUELU_AGGREGATIONPHASE2AALGORITHM_DEF_HPP_ */
Kokkos::View< unsigned *, typename LWGraphHostType::device_type > AggStatHostType
void BuildAggregatesRandom(const Teuchos::ParameterList &params, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
Lightweight MueLu representation of a compressed row storage graph.
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
Container class for aggregation information.
KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const
GlobalOrdinal GO
T & get(const std::string &name, T def_value)
LocalOrdinal LO
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
size_type size() const
void SetIsRoot(LO i, bool value=true)
Set root node information.
KOKKOS_INLINE_FUNCTION size_type getLocalMaxNumRowEntries() const
Returns the maximum number of entries across all rows/columns on this node.
LO GetGraphNumColors()
Get the number of colors needed by the distance 2 coloring.
colors_view_type & GetGraphColors()
Get a distance 2 coloring of the underlying graph. The coloring is computed and set during Phase1 of ...
KOKKOS_INLINE_FUNCTION bool isLocalNeighborVertex(LO i) const
Return true if vertex with local id &#39;v&#39; is on current process.
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
KOKKOS_INLINE_FUNCTION neighbor_vertices_type getNeighborVertices(LO i) const
Return the list of vertices adjacent to the vertex &#39;v&#39;.
void BuildAggregatesDeterministic(const Teuchos::ParameterList &params, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
Timer to be used in non-factories.
Lightweight MueLu representation of a compressed row storage graph.
void BuildAggregatesNonKokkos(const ParameterList &params, const LWGraph &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatHostType &aggStat, LO &numNonAggregatedNodes) const
Local aggregation.
void BuildAggregates(const Teuchos::ParameterList &params, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
typename std::conditional< OnHost, Kokkos::Device< Kokkos::Serial, Kokkos::HostSpace >, typename Node::device_type >::type device_type
const RCP< const Teuchos::Comm< int > > GetComm() const
Kokkos::View< unsigned *, typename LWGraphType::device_type > AggStatType
void SetNumAggregates(LO nAggregates)
Set number of local aggregates on current processor.