MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_AggregationPhase1Algorithm_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // MueLu: A package for multigrid based preconditioning
4 //
5 // Copyright 2012 NTESS and the MueLu contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef MUELU_AGGREGATIONPHASE1ALGORITHM_DEF_HPP_
11 #define MUELU_AGGREGATIONPHASE1ALGORITHM_DEF_HPP_
12 
13 #include <queue>
14 
15 #include <Teuchos_Comm.hpp>
16 #include <Teuchos_CommHelpers.hpp>
17 
18 #include <Xpetra_Vector.hpp>
19 
21 
22 #include "MueLu_LWGraph.hpp"
23 #include "MueLu_Aggregates.hpp"
24 #include "MueLu_Exceptions.hpp"
25 #include "MueLu_Monitor.hpp"
26 
27 #include "Kokkos_Sort.hpp"
28 #include <Kokkos_ScatterView.hpp>
29 
30 namespace MueLu {
31 
32 template <class LocalOrdinal, class GlobalOrdinal, class Node>
35  LO& numNonAggregatedNodes) const {
36  Monitor m(*this, "BuildAggregatesNonKokkos");
37 
38  std::string orderingStr = params.get<std::string>("aggregation: ordering");
39  int maxNeighAlreadySelected = params.get<int>("aggregation: max selected neighbors");
40  int minNodesPerAggregate = params.get<int>("aggregation: min agg size");
41  int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
42  bool matchMLBehavior = params.get<bool>("aggregation: match ML phase1");
43 
44  TEUCHOS_TEST_FOR_EXCEPTION(maxNodesPerAggregate < minNodesPerAggregate, Exceptions::RuntimeError,
45  "MueLu::UncoupledAggregationAlgorithm::BuildAggregatesNonKokkos: minNodesPerAggregate must be smaller or equal to MaxNodePerAggregate!");
46 
47  enum {
48  O_NATURAL,
49  O_RANDOM,
50  O_GRAPH
51  } ordering;
52  ordering = O_NATURAL; // initialize variable (fix CID 143665)
53  if (orderingStr == "natural") ordering = O_NATURAL;
54  if (orderingStr == "random") ordering = O_RANDOM;
55  if (orderingStr == "graph") ordering = O_GRAPH;
56 
57  const LO numRows = graph.GetNodeNumVertices();
58  const int myRank = graph.GetComm()->getRank();
59 
60  ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0);
61  ArrayRCP<LO> procWinner = aggregates.GetProcWinner()->getDataNonConst(0);
62 
63  LO numLocalAggregates = aggregates.GetNumAggregates();
64 
65  ArrayRCP<LO> randomVector;
66  if (ordering == O_RANDOM) {
67  randomVector = arcp<LO>(numRows);
68  for (LO i = 0; i < numRows; i++)
69  randomVector[i] = i;
70  RandomReorder(randomVector);
71  }
72 
73  int aggIndex = -1;
74  size_t aggSize = 0;
75  std::vector<int> aggList(graph.getLocalMaxNumRowEntries());
76 
77  std::queue<LO> graphOrderQueue;
78 
79  // Main loop over all local rows of graph(A)
80  for (LO i = 0; i < numRows; i++) {
81  // Step 1: pick the next node to aggregate
82  LO rootCandidate = 0;
83  if (ordering == O_NATURAL)
84  rootCandidate = i;
85  else if (ordering == O_RANDOM)
86  rootCandidate = randomVector[i];
87  else if (ordering == O_GRAPH) {
88  if (graphOrderQueue.size() == 0) {
89  // Current queue is empty for "graph" ordering, populate with one READY node
90  for (LO jnode = 0; jnode < numRows; jnode++)
91  if (aggStat[jnode] == READY) {
92  graphOrderQueue.push(jnode);
93  break;
94  }
95  }
96  if (graphOrderQueue.size() == 0) {
97  // There are no more ready nodes, end the phase
98  break;
99  }
100  rootCandidate = graphOrderQueue.front(); // take next node from graph ordering queue
101  graphOrderQueue.pop(); // delete this node in list
102  }
103 
104  if (aggStat[rootCandidate] != READY)
105  continue;
106 
107  // Step 2: build tentative aggregate
108  aggSize = 0;
109  aggList[aggSize++] = rootCandidate;
110 
111  auto neighOfINode = graph.getNeighborVertices(rootCandidate);
112 
113  // If the number of neighbors is less than the minimum number of nodes
114  // per aggregate, we know this is not going to be a valid root, and we
115  // may skip it, but only for "natural" and "random" (for "graph" we still
116  // need to fetch the list of local neighbors to continue)
117  if ((ordering == O_NATURAL || ordering == O_RANDOM) &&
118  neighOfINode.length < minNodesPerAggregate) {
119  continue;
120  }
121 
122  LO numAggregatedNeighbours = 0;
123 
124  for (int j = 0; j < neighOfINode.length; j++) {
125  LO neigh = neighOfINode(j);
126 
127  if (neigh != rootCandidate && graph.isLocalNeighborVertex(neigh)) {
128  if (aggStat[neigh] == READY || aggStat[neigh] == NOTSEL) {
129  // If aggregate size does not exceed max size, add node to the
130  // tentative aggregate
131  // NOTE: We do not exit the loop over all neighbours since we have
132  // still to count all aggregated neighbour nodes for the
133  // aggregation criteria
134  // NOTE: We check here for the maximum aggregation size. If we
135  // would do it below with all the other check too big aggregates
136  // would not be accepted at all.
137  if (aggSize < as<size_t>(maxNodesPerAggregate))
138  aggList[aggSize++] = neigh;
139 
140  } else if (!matchMLBehavior || aggStat[neigh] != IGNORED) {
141  // NOTE: ML checks against BOUNDARY here, but boundary nodes are flagged as IGNORED by
142  // the time we get to Phase 1, so we check IGNORED instead
143  numAggregatedNeighbours++;
144  }
145  }
146  }
147 
148  // Step 3: check if tentative aggregate is acceptable
149  if ((numAggregatedNeighbours <= maxNeighAlreadySelected) && // too many connections to other aggregates
150  (aggSize >= as<size_t>(minNodesPerAggregate))) { // too few nodes in the tentative aggregate
151  // Accept new aggregate
152  // rootCandidate becomes the root of the newly formed aggregate
153  aggregates.SetIsRoot(rootCandidate);
154  aggIndex = numLocalAggregates++;
155 
156  for (size_t k = 0; k < aggSize; k++) {
157  aggStat[aggList[k]] = AGGREGATED;
158  vertex2AggId[aggList[k]] = aggIndex;
159  procWinner[aggList[k]] = myRank;
160  }
161 
162  numNonAggregatedNodes -= aggSize;
163 
164  } else {
165  // Aggregate is not accepted
166  aggStat[rootCandidate] = NOTSEL;
167 
168  // Need this for the "graph" ordering below
169  // The original candidate is always aggList[0]
170  aggSize = 1;
171  }
172 
173  if (ordering == O_GRAPH) {
174  // Add candidates to the list of nodes
175  // NOTE: the code have slightly different meanings depending on context:
176  // - if aggregate was accepted, we add neighbors of neighbors of the original candidate
177  // - if aggregate was not accepted, we add neighbors of the original candidate
178  for (size_t k = 0; k < aggSize; k++) {
179  auto neighOfJNode = graph.getNeighborVertices(aggList[k]);
180 
181  for (int j = 0; j < neighOfJNode.length; j++) {
182  LO neigh = neighOfJNode(j);
183 
184  if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == READY)
185  graphOrderQueue.push(neigh);
186  }
187  }
188  }
189  }
190 
191  // Reset all NOTSEL vertices to READY
192  // This simplifies other algorithms
193  for (LO i = 0; i < numRows; i++)
194  if (aggStat[i] == NOTSEL)
195  aggStat[i] = READY;
196 
197  // update aggregate object
198  aggregates.SetNumAggregates(numLocalAggregates);
199 }
200 
201 template <class LocalOrdinal, class GlobalOrdinal, class Node>
203  // TODO: replace int
204  int n = list.size();
205  for (int i = 0; i < n - 1; i++)
206  std::swap(list[i], list[RandomOrdinal(i, n - 1)]);
207 }
208 
209 template <class LocalOrdinal, class GlobalOrdinal, class Node>
211  return min + as<int>((max - min + 1) * (static_cast<double>(std::rand()) / (RAND_MAX + 1.0)));
212 }
213 
214 template <class LocalOrdinal, class GlobalOrdinal, class Node>
217  const LWGraph_kokkos& graph,
218  Aggregates& aggregates,
220  LO& numNonAggregatedNodes) const {
221  int minNodesPerAggregate = params.get<int>("aggregation: min agg size");
222  int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
223 
224  TEUCHOS_TEST_FOR_EXCEPTION(maxNodesPerAggregate < minNodesPerAggregate,
226  "MueLu::UncoupledAggregationAlgorithm::BuildAggregates: minNodesPerAggregate must be smaller or equal to MaxNodePerAggregate!");
227 
228  // Distance-2 gives less control than serial uncoupled phase 1
229  // no custom row reordering because would require making deep copy
230  // of local matrix entries and permuting it can only enforce
231  // max aggregate size
232  {
233  if (params.get<bool>("aggregation: deterministic")) {
234  Monitor m(*this, "BuildAggregatesDeterministic");
235  BuildAggregatesDeterministic(maxNodesPerAggregate, graph,
236  aggregates, aggStat, numNonAggregatedNodes);
237  } else {
238  Monitor m(*this, "BuildAggregatesRandom");
239  BuildAggregatesRandom(maxNodesPerAggregate, graph,
240  aggregates, aggStat, numNonAggregatedNodes);
241  }
242  }
243 }
244 
245 template <class LocalOrdinal, class GlobalOrdinal, class Node>
247  BuildAggregatesRandom(const LO maxAggSize,
248  const LWGraph_kokkos& graph,
249  Aggregates& aggregates,
251  LO& numNonAggregatedNodes) const {
252  using device_type = typename LWGraph_kokkos::device_type;
253  using execution_space = typename LWGraph_kokkos::execution_space;
254 
255  const LO numRows = graph.GetNodeNumVertices();
256  const int myRank = graph.GetComm()->getRank();
257 
258  // Extract data from aggregates
259  auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
260  auto procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
261  auto colors = aggregates.GetGraphColors();
262 
263  auto lclLWGraph = graph;
264 
265  LO numAggregatedNodes = 0;
266  LO numLocalAggregates = aggregates.GetNumAggregates();
267  Kokkos::View<LO, device_type> aggCount("aggCount");
268  Kokkos::deep_copy(aggCount, numLocalAggregates);
269  Kokkos::parallel_for(
270  "Aggregation Phase 1: initial reduction over color == 1",
271  Kokkos::RangePolicy<LO, execution_space>(0, numRows),
272  KOKKOS_LAMBDA(const LO nodeIdx) {
273  if (colors(nodeIdx) == 1 && aggStat(nodeIdx) == READY) {
274  const LO aggIdx = Kokkos::atomic_fetch_add(&aggCount(), 1);
275  vertex2AggId(nodeIdx, 0) = aggIdx;
276  aggStat(nodeIdx) = AGGREGATED;
277  procWinner(nodeIdx, 0) = myRank;
278  }
279  });
280  // Truely we wish to compute: numAggregatedNodes = aggCount - numLocalAggregates
281  // before updating the value of numLocalAggregates.
282  // But since we also do not want to create a host mirror of aggCount we do some trickery...
283  numAggregatedNodes -= numLocalAggregates;
284  Kokkos::deep_copy(numLocalAggregates, aggCount);
285  numAggregatedNodes += numLocalAggregates;
286 
287  // Compute the initial size of the aggregates.
288  // Note lbv 12-21-17: I am pretty sure that the aggregates will always be of size 1
289  // at this point so we could simplify the code below a lot if this
290  // assumption is correct...
291  Kokkos::View<LO*, device_type> aggSizesView("aggSizes", numLocalAggregates);
292  {
293  // Here there is a possibility that two vertices assigned to two different threads contribute
294  // to the same aggregate if somethings happened before phase 1?
295  auto aggSizesScatterView = Kokkos::Experimental::create_scatter_view(aggSizesView);
296  Kokkos::parallel_for(
297  "Aggregation Phase 1: compute initial aggregates size",
298  Kokkos::RangePolicy<LO, execution_space>(0, numRows),
299  KOKKOS_LAMBDA(const LO nodeIdx) {
300  auto aggSizesScatterViewAccess = aggSizesScatterView.access();
301  if (vertex2AggId(nodeIdx, 0) >= 0)
302  aggSizesScatterViewAccess(vertex2AggId(nodeIdx, 0)) += 1;
303  });
304  Kokkos::Experimental::contribute(aggSizesView, aggSizesScatterView);
305  }
306 
307  LO tmpNumAggregatedNodes = 0;
308  Kokkos::parallel_reduce(
309  "Aggregation Phase 1: main parallel_reduce over aggSizes",
310  Kokkos::RangePolicy<size_t, execution_space>(0, numRows),
311  KOKKOS_LAMBDA(const size_t nodeIdx, LO& lNumAggregatedNodes) {
312  if (colors(nodeIdx) != 1 && (aggStat(nodeIdx) == READY || aggStat(nodeIdx) == NOTSEL)) {
313  // Get neighbors of vertex i and look for local, aggregated,
314  // color 1 neighbor (valid root).
315  auto neighbors = lclLWGraph.getNeighborVertices(nodeIdx);
316  for (LO j = 0; j < neighbors.length; ++j) {
317  auto nei = neighbors.colidx(j);
318  if (lclLWGraph.isLocalNeighborVertex(nei) && colors(nei) == 1 && aggStat(nei) == AGGREGATED) {
319  // This atomic guarentees that any other node trying to
320  // join aggregate agg has the correct size.
321  LO agg = vertex2AggId(nei, 0);
322  const LO aggSize = Kokkos::atomic_fetch_add(&aggSizesView(agg),
323  1);
324  if (aggSize < maxAggSize) {
325  // assign vertex i to aggregate with root j
326  vertex2AggId(nodeIdx, 0) = agg;
327  procWinner(nodeIdx, 0) = myRank;
328  aggStat(nodeIdx) = AGGREGATED;
329  ++lNumAggregatedNodes;
330  break;
331  } else {
332  // Decrement back the value of aggSizesView(agg)
333  Kokkos::atomic_dec(&aggSizesView(agg));
334  }
335  }
336  }
337  }
338  // if(aggStat(nodeIdx) != AGGREGATED) {
339  // lNumNonAggregatedNodes++;
340  if (aggStat(nodeIdx) == NOTSEL) {
341  aggStat(nodeIdx) = READY;
342  }
343  // }
344  },
345  tmpNumAggregatedNodes);
346  numAggregatedNodes += tmpNumAggregatedNodes;
347  numNonAggregatedNodes -= numAggregatedNodes;
348 
349  // update aggregate object
350  aggregates.SetNumAggregates(numLocalAggregates);
351 }
352 
353 template <class LocalOrdinal, class GlobalOrdinal, class Node>
356  const LWGraph_kokkos& graph,
357  Aggregates& aggregates,
359  LO& numNonAggregatedNodes) const {
360  using device_type = typename LWGraph_kokkos::device_type;
361  using execution_space = typename LWGraph_kokkos::execution_space;
362 
363  const LO numRows = graph.GetNodeNumVertices();
364  const int myRank = graph.GetComm()->getRank();
365 
366  auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
367  auto procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
368  auto colors = aggregates.GetGraphColors();
369 
370  auto lclLWGraph = graph;
371 
372  LO numLocalAggregates = aggregates.GetNumAggregates();
373  Kokkos::View<LO, device_type> numLocalAggregatesView("Num aggregates");
374  {
375  auto h_nla = Kokkos::create_mirror_view(numLocalAggregatesView);
376  h_nla() = numLocalAggregates;
377  Kokkos::deep_copy(numLocalAggregatesView, h_nla);
378  }
379 
380  Kokkos::View<LO*, device_type> newRoots("New root LIDs", numNonAggregatedNodes);
381  Kokkos::View<LO, device_type> numNewRoots("Number of new aggregates of current color");
382  auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots);
383 
384  // first loop build the set of new roots
385  Kokkos::parallel_for(
386  "Aggregation Phase 1: building list of new roots",
387  Kokkos::RangePolicy<execution_space>(0, numRows),
388  KOKKOS_LAMBDA(const LO i) {
389  if (colors(i) == 1 && aggStat(i) == READY) {
390  // i will become a root
391  newRoots(Kokkos::atomic_fetch_add(&numNewRoots(), 1)) = i;
392  }
393  });
394  Kokkos::deep_copy(h_numNewRoots, numNewRoots);
395  // sort new roots by LID to guarantee determinism in agg IDs
396  Kokkos::sort(newRoots, 0, h_numNewRoots());
397  LO numAggregated = 0;
398  Kokkos::parallel_reduce(
399  "Aggregation Phase 1: aggregating nodes",
400  Kokkos::RangePolicy<execution_space>(0, h_numNewRoots()),
401  KOKKOS_LAMBDA(const LO rootIndex, LO& lnumAggregated) {
402  LO root = newRoots(rootIndex);
403  LO aggID = numLocalAggregatesView() + rootIndex;
404  LO aggSize = 1;
405  vertex2AggId(root, 0) = aggID;
406  procWinner(root, 0) = myRank;
407  aggStat(root) = AGGREGATED;
408  auto neighOfRoot = lclLWGraph.getNeighborVertices(root);
409  for (LO n = 0; n < neighOfRoot.length; n++) {
410  LO neigh = neighOfRoot(n);
411  if (lclLWGraph.isLocalNeighborVertex(neigh) && aggStat(neigh) == READY) {
412  // add neigh to aggregate
413  vertex2AggId(neigh, 0) = aggID;
414  procWinner(neigh, 0) = myRank;
415  aggStat(neigh) = AGGREGATED;
416  aggSize++;
417  if (aggSize == maxAggSize) {
418  // can't add any more nodes
419  break;
420  }
421  }
422  }
423  lnumAggregated += aggSize;
424  },
425  numAggregated);
426  numNonAggregatedNodes -= numAggregated;
427  // update aggregate object
428  aggregates.SetNumAggregates(numLocalAggregates + h_numNewRoots());
429 }
430 
431 } // namespace MueLu
432 
433 #endif /* MUELU_AGGREGATIONPHASE1ALGORITHM_DEF_HPP_ */
Kokkos::View< unsigned *, typename LWGraphHostType::device_type > AggStatHostType
LocalOrdinal RandomOrdinal(LocalOrdinal min, LocalOrdinal max)
void BuildAggregatesRandom(const LO maxAggSize, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
Lightweight MueLu representation of a compressed row storage graph.
void BuildAggregatesNonKokkos(const ParameterList &params, const LWGraph &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatHostType &aggStat, LO &numNonAggregatedNodes) const
Local aggregation.
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
Container class for aggregation information.
KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const
T & get(const std::string &name, T def_value)
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
LocalOrdinal LO
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
size_type size() const
void SetIsRoot(LO i, bool value=true)
Set root node information.
KOKKOS_INLINE_FUNCTION size_type getLocalMaxNumRowEntries() const
Returns the maximum number of entries across all rows/columns on this node.
colors_view_type & GetGraphColors()
Get a distance 2 coloring of the underlying graph. The coloring is computed and set during Phase1 of ...
void BuildAggregatesDeterministic(const LO maxAggSize, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
KOKKOS_INLINE_FUNCTION bool isLocalNeighborVertex(LO i) const
Return true if vertex with local id &#39;v&#39; is on current process.
void RandomReorder(ArrayRCP< LO > list) const
Utility to take a list of integers and reorder them randomly (by using a local permutation).
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
KOKKOS_INLINE_FUNCTION neighbor_vertices_type getNeighborVertices(LO i) const
Return the list of vertices adjacent to the vertex &#39;v&#39;.
void BuildAggregates(const Teuchos::ParameterList &params, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
Timer to be used in non-factories.
void RandomReorder(Teuchos::Array< LocalOrdinal > &list)
Lightweight MueLu representation of a compressed row storage graph.
Exception throws to report errors in the internal logical of the program.
typename std::conditional< OnHost, Kokkos::Device< Kokkos::Serial, Kokkos::HostSpace >, typename Node::device_type >::type device_type
int RandomOrdinal(int min, int max) const
Generate a random number in the range [min, max].
const RCP< const Teuchos::Comm< int > > GetComm() const
Kokkos::View< unsigned *, typename LWGraphType::device_type > AggStatType
void SetNumAggregates(LO nAggregates)
Set number of local aggregates on current processor.