MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_AggregationPhase2aAlgorithm_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DEF_HPP
47 #define MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DEF_HPP
48 
49 #include <Teuchos_Comm.hpp>
50 #include <Teuchos_CommHelpers.hpp>
51 
52 #include <Xpetra_Vector.hpp>
53 
55 
56 #include "MueLu_Aggregates.hpp"
57 #include "MueLu_Exceptions.hpp"
58 #include "MueLu_LWGraph_kokkos.hpp"
59 #include "MueLu_Monitor.hpp"
60 
61 #include "Kokkos_Sort.hpp"
62 
63 namespace MueLu {
64 
65 template <class LocalOrdinal, class GlobalOrdinal, class Node>
68  const LWGraph_kokkos& graph,
69  Aggregates& aggregates,
70  Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
71  LO& numNonAggregatedNodes) const {
72  if (params.get<bool>("aggregation: deterministic")) {
73  Monitor m(*this, "BuildAggregatesDeterministic");
74  BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes);
75  } else {
76  Monitor m(*this, "BuildAggregatesRandom");
77  BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes);
78  }
79 
80 } // BuildAggregates
81 
82 template <class LO, class GO, class Node>
85  const LWGraph_kokkos& graph,
86  Aggregates& aggregates,
87  Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
88  LO& numNonAggregatedNodes) const {
89  const int minNodesPerAggregate = params.get<int>("aggregation: min agg size");
90  const int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
91  bool matchMLbehavior = params.get<bool>("aggregation: match ML phase2a");
92 
93  const LO numRows = graph.GetNodeNumVertices();
94  const int myRank = graph.GetComm()->getRank();
95 
96  auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
97  auto procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
98  auto colors = aggregates.GetGraphColors();
99  const LO numColors = aggregates.GetGraphNumColors();
100 
101  auto lclLWGraph = graph;
102 
103  LO numLocalNodes = numRows;
104  LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
105 
106  const double aggFactor = 0.5;
107  double factor = static_cast<double>(numLocalAggregated) / (numLocalNodes + 1);
108  factor = pow(factor, aggFactor);
109 
110  // LBV on Sept 12, 2019: this looks a little heavy handed,
111  // I'm not sure a view is needed to perform atomic updates.
112  // If we can avoid this and use a simple LO that would be
113  // simpler for later maintenance.
114  Kokkos::View<LO, device_type> numLocalAggregates("numLocalAggregates");
115  typename Kokkos::View<LO, device_type>::HostMirror h_numLocalAggregates =
116  Kokkos::create_mirror_view(numLocalAggregates);
117  h_numLocalAggregates() = aggregates.GetNumAggregates();
118  Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
119 
120  // Now we create new aggregates using root nodes in all colors other than the first color,
121  // as the first color was already exhausted in Phase 1.
122  for (int color = 2; color < numColors + 1; ++color) {
123  LO tmpNumNonAggregatedNodes = 0;
124  Kokkos::parallel_reduce(
125  "Aggregation Phase 2a: loop over each individual color",
126  Kokkos::RangePolicy<execution_space>(0, numRows),
127  KOKKOS_LAMBDA(const LO rootCandidate, LO& lNumNonAggregatedNodes) {
128  if (aggStat(rootCandidate) == READY &&
129  colors(rootCandidate) == color) {
130  LO numNeighbors = 0;
131  LO aggSize = 0;
132  if (matchMLbehavior) {
133  aggSize += 1;
134  numNeighbors += 1;
135  }
136 
137  auto neighbors = lclLWGraph.getNeighborVertices(rootCandidate);
138 
139  // Loop over neighbors to count how many nodes could join
140  // the new aggregate
141 
142  for (int j = 0; j < neighbors.length; ++j) {
143  LO neigh = neighbors(j);
144  if (neigh != rootCandidate) {
145  if (lclLWGraph.isLocalNeighborVertex(neigh) &&
146  (aggStat(neigh) == READY) &&
147  (aggSize < maxNodesPerAggregate)) {
148  ++aggSize;
149  }
150  ++numNeighbors;
151  }
152  }
153 
154  // If a sufficient number of nodes can join the new aggregate
155  // then we actually create the aggregate.
156  if (aggSize > minNodesPerAggregate &&
157  (aggSize > factor * numNeighbors)) {
158  // aggregates.SetIsRoot(rootCandidate);
159  LO aggIndex = Kokkos::
160  atomic_fetch_add(&numLocalAggregates(), 1);
161 
162  LO numAggregated = 0;
163 
164  if (matchMLbehavior) {
165  // Add the root.
166  aggStat(rootCandidate) = AGGREGATED;
167  vertex2AggId(rootCandidate, 0) = aggIndex;
168  procWinner(rootCandidate, 0) = myRank;
169  ++numAggregated;
170  --lNumNonAggregatedNodes;
171  }
172 
173  for (int neighIdx = 0; neighIdx < neighbors.length; ++neighIdx) {
174  LO neigh = neighbors(neighIdx);
175  if (neigh != rootCandidate) {
176  if (lclLWGraph.isLocalNeighborVertex(neigh) &&
177  (aggStat(neigh) == READY) &&
178  (numAggregated < aggSize)) {
179  aggStat(neigh) = AGGREGATED;
180  vertex2AggId(neigh, 0) = aggIndex;
181  procWinner(neigh, 0) = myRank;
182 
183  ++numAggregated;
184  --lNumNonAggregatedNodes;
185  }
186  }
187  }
188  }
189  }
190  },
191  tmpNumNonAggregatedNodes);
192  numNonAggregatedNodes += tmpNumNonAggregatedNodes;
193  }
194 
195  // update aggregate object
196  Kokkos::deep_copy(h_numLocalAggregates, numLocalAggregates);
197  aggregates.SetNumAggregates(h_numLocalAggregates());
198 } // BuildAggregatesRandom
199 
200 template <class LO, class GO, class Node>
203  const LWGraph_kokkos& graph,
204  Aggregates& aggregates,
205  Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
206  LO& numNonAggregatedNodes) const {
207  const int minNodesPerAggregate = params.get<int>("aggregation: min agg size");
208  const int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
209 
210  const LO numRows = graph.GetNodeNumVertices();
211  const int myRank = graph.GetComm()->getRank();
212 
213  auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
214  auto procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
215  auto colors = aggregates.GetGraphColors();
216  const LO numColors = aggregates.GetGraphNumColors();
217 
218  auto lclLWGraph = graph;
219 
220  LO numLocalNodes = procWinner.size();
221  LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
222 
223  const double aggFactor = 0.5;
224  double factor = as<double>(numLocalAggregated) / (numLocalNodes + 1);
225  factor = pow(factor, aggFactor);
226 
227  Kokkos::View<LO, device_type> numLocalAggregates("numLocalAggregates");
228  typename Kokkos::View<LO, device_type>::HostMirror h_numLocalAggregates =
229  Kokkos::create_mirror_view(numLocalAggregates);
230  h_numLocalAggregates() = aggregates.GetNumAggregates();
231  Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
232 
233  // Now we create new aggregates using root nodes in all colors other than the first color,
234  // as the first color was already exhausted in Phase 1.
235  //
236  // In the deterministic version, exactly the same set of aggregates will be created
237  // (as the nondeterministic version)
238  // because no vertex V can be a neighbor of two vertices of the same color, so two root
239  // candidates can't fight over V
240  //
241  // But, the precise values in vertex2AggId need to match exactly, so just sort the new
242  // roots of each color before assigning aggregate IDs
243 
244  // numNonAggregatedNodes is the best available upper bound for the number of aggregates
245  // which may be created in this phase, so use it for the size of newRoots
246  Kokkos::View<LO*, device_type> newRoots("New root LIDs", numNonAggregatedNodes);
247  Kokkos::View<LO, device_type> numNewRoots("Number of new aggregates of current color");
248  auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots);
249  for (int color = 1; color < numColors + 1; ++color) {
250  h_numNewRoots() = 0;
251  Kokkos::deep_copy(numNewRoots, h_numNewRoots);
252  Kokkos::parallel_for(
253  "Aggregation Phase 2a: determining new roots of current color",
254  Kokkos::RangePolicy<execution_space>(0, numRows),
255  KOKKOS_LAMBDA(const LO rootCandidate) {
256  if (aggStat(rootCandidate) == READY &&
257  colors(rootCandidate) == color) {
258  LO aggSize = 0;
259  auto neighbors = lclLWGraph.getNeighborVertices(rootCandidate);
260  // Loop over neighbors to count how many nodes could join
261  // the new aggregate
262  LO numNeighbors = 0;
263  for (int j = 0; j < neighbors.length; ++j) {
264  LO neigh = neighbors(j);
265  if (neigh != rootCandidate) {
266  if (lclLWGraph.isLocalNeighborVertex(neigh) &&
267  aggStat(neigh) == READY &&
268  aggSize < maxNodesPerAggregate) {
269  ++aggSize;
270  }
271  ++numNeighbors;
272  }
273  }
274  // If a sufficient number of nodes can join the new aggregate
275  // then we mark rootCandidate as a future root.
276  if (aggSize > minNodesPerAggregate && aggSize > factor * numNeighbors) {
277  LO newRootIndex = Kokkos::atomic_fetch_add(&numNewRoots(), 1);
278  newRoots(newRootIndex) = rootCandidate;
279  }
280  }
281  });
282  Kokkos::deep_copy(h_numNewRoots, numNewRoots);
283 
284  if (h_numNewRoots() > 0) {
285  // sort the new root indices
286  Kokkos::sort(newRoots, 0, h_numNewRoots());
287  // now, loop over all new roots again and actually create the aggregates
288  LO tmpNumNonAggregatedNodes = 0;
289  // First, just find the set of color vertices which will become aggregate roots
290  Kokkos::parallel_reduce(
291  "Aggregation Phase 2a: create new aggregates",
292  Kokkos::RangePolicy<execution_space>(0, h_numNewRoots()),
293  KOKKOS_LAMBDA(const LO newRootIndex, LO& lNumNonAggregatedNodes) {
294  LO root = newRoots(newRootIndex);
295  LO newAggID = numLocalAggregates() + newRootIndex;
296  auto neighbors = lclLWGraph.getNeighborVertices(root);
297  // Loop over neighbors and add them to new aggregate
298  aggStat(root) = AGGREGATED;
299  vertex2AggId(root, 0) = newAggID;
300  LO aggSize = 1;
301  for (int j = 0; j < neighbors.length; ++j) {
302  LO neigh = neighbors(j);
303  if (neigh != root) {
304  if (lclLWGraph.isLocalNeighborVertex(neigh) &&
305  aggStat(neigh) == READY &&
306  aggSize < maxNodesPerAggregate) {
307  aggStat(neigh) = AGGREGATED;
308  vertex2AggId(neigh, 0) = newAggID;
309  procWinner(neigh, 0) = myRank;
310  aggSize++;
311  }
312  }
313  }
314  lNumNonAggregatedNodes -= aggSize;
315  },
316  tmpNumNonAggregatedNodes);
317  numNonAggregatedNodes += tmpNumNonAggregatedNodes;
318  h_numLocalAggregates() += h_numNewRoots();
319  Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
320  }
321  }
322  aggregates.SetNumAggregates(h_numLocalAggregates());
323 }
324 
325 } // namespace MueLu
326 
327 #endif // MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DEF_HPP
void BuildAggregates(const Teuchos::ParameterList &params, const LWGraph_kokkos &graph, Aggregates &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
Local aggregation.
Lightweight MueLu representation of a compressed row storage graph.
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
Container class for aggregation information.
KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const
T & get(const std::string &name, T def_value)
LocalOrdinal LO
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
LO GetGraphNumColors()
Get the number of colors needed by the distance 2 coloring.
void BuildAggregatesRandom(const Teuchos::ParameterList &params, const LWGraph_kokkos &graph, Aggregates &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
colors_view_type & GetGraphColors()
Get a distance 2 coloring of the underlying graph. The coloring is computed and set during Phase1 of ...
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
void BuildAggregatesDeterministic(const Teuchos::ParameterList &params, const LWGraph_kokkos &graph, Aggregates &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
Timer to be used in non-factories.
const RCP< const Teuchos::Comm< int > > GetComm() const
void SetNumAggregates(LO nAggregates)
Set number of local aggregates on current processor.