MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_AggregationPhase2bAlgorithm_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP
47 #define MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP
48 
49 #include <Teuchos_Comm.hpp>
50 #include <Teuchos_CommHelpers.hpp>
51 
52 #include <Xpetra_Vector.hpp>
53 
55 
56 #include "MueLu_Aggregates.hpp"
57 #include "MueLu_Exceptions.hpp"
58 #include "MueLu_LWGraph_kokkos.hpp"
59 #include "MueLu_Monitor.hpp"
60 
61 namespace MueLu {
62 
63 // Try to stick unaggregated nodes into a neighboring aggregate if they are
64 // not already too big
65 template <class LocalOrdinal, class GlobalOrdinal, class Node>
68  const LWGraph_kokkos& graph,
69  Aggregates& aggregates,
70  Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
71  LO& numNonAggregatedNodes) const {
72  if (params.get<bool>("aggregation: deterministic")) {
73  Monitor m(*this, "BuildAggregatesDeterministic");
74  BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes);
75  } else {
76  Monitor m(*this, "BuildAggregatesRandom");
77  BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes);
78  }
79 
80 } // BuildAggregates
81 
82 template <class LO, class GO, class Node>
85  const LWGraph_kokkos& graph,
86  Aggregates& aggregates,
87  Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
88  LO& numNonAggregatedNodes) const {
89  const LO numRows = graph.GetNodeNumVertices();
90  const int myRank = graph.GetComm()->getRank();
91 
92  auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
93  auto procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
94  auto colors = aggregates.GetGraphColors();
95  const LO numColors = aggregates.GetGraphNumColors();
96  const LO numLocalAggregates = aggregates.GetNumAggregates();
97 
98  auto lclLWGraph = graph;
99 
100  const LO defaultConnectWeight = 100;
101  const LO penaltyConnectWeight = 10;
102 
103  Kokkos::View<LO*, device_type> aggWeight(Kokkos::ViewAllocateWithoutInitializing("aggWeight"), numLocalAggregates); // This gets re-initialized at the start of each "color" loop
104  Kokkos::View<LO*, device_type> connectWeight(Kokkos::ViewAllocateWithoutInitializing("connectWeight"), numRows);
105  Kokkos::View<LO*, device_type> aggPenalties("aggPenalties", numLocalAggregates); // This gets initialized to zero here
106 
107  Kokkos::deep_copy(connectWeight, defaultConnectWeight);
108 
109  // taw: by running the aggregation routine more than once there is a chance that also
110  // non-aggregated nodes with a node distance of two are added to existing aggregates.
111  // Assuming that the aggregate size is 3 in each direction running the algorithm only twice
112  // should be sufficient.
113  // lbv: If the prior phase of aggregation where run without specifying an aggregate size,
114  // the distance 2 coloring and phase 1 aggregation actually guarantee that only one iteration
115  // is needed to reach distance 2 neighbors.
116  int maxIters = 2;
117  int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
118  if (maxNodesPerAggregate == std::numeric_limits<int>::max()) {
119  maxIters = 1;
120  }
121  for (int iter = 0; iter < maxIters; ++iter) {
122  for (LO color = 1; color <= numColors; ++color) {
123  Kokkos::deep_copy(aggWeight, 0);
124 
125  // the reduce counts how many nodes are aggregated by this phase,
126  // which will then be subtracted from numNonAggregatedNodes
127  LO numAggregated = 0;
128  Kokkos::parallel_reduce(
129  "Aggregation Phase 2b: aggregates expansion",
130  Kokkos::RangePolicy<execution_space>(0, numRows),
131  KOKKOS_LAMBDA(const LO i, LO& tmpNumAggregated) {
132  if (aggStat(i) != READY || colors(i) != color)
133  return;
134 
135  auto neighOfINode = lclLWGraph.getNeighborVertices(i);
136  for (int j = 0; j < neighOfINode.length; j++) {
137  LO neigh = neighOfINode(j);
138 
139  // We don't check (neigh != i), as it is covered by checking
140  // (aggStat[neigh] == AGGREGATED)
141  if (lclLWGraph.isLocalNeighborVertex(neigh) &&
142  aggStat(neigh) == AGGREGATED)
143  Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)),
144  connectWeight(neigh));
145  }
146 
147  int bestScore = -100000;
148  int bestAggId = -1;
149  int bestConnect = -1;
150 
151  for (int j = 0; j < neighOfINode.length; j++) {
152  LO neigh = neighOfINode(j);
153 
154  if (lclLWGraph.isLocalNeighborVertex(neigh) &&
155  aggStat(neigh) == AGGREGATED) {
156  auto aggId = vertex2AggId(neigh, 0);
157  int score = aggWeight(aggId) - aggPenalties(aggId);
158 
159  if (score > bestScore) {
160  bestAggId = aggId;
161  bestScore = score;
162  bestConnect = connectWeight(neigh);
163 
164  } else if (aggId == bestAggId &&
165  connectWeight(neigh) > bestConnect) {
166  bestConnect = connectWeight(neigh);
167  }
168  }
169  }
170  if (bestScore >= 0) {
171  aggStat(i) = AGGREGATED;
172  vertex2AggId(i, 0) = bestAggId;
173  procWinner(i, 0) = myRank;
174 
175  Kokkos::atomic_add(&aggPenalties(bestAggId), 1);
176  connectWeight(i) = bestConnect - penaltyConnectWeight;
177  tmpNumAggregated++;
178  }
179  },
180  numAggregated); // parallel_for
181  numNonAggregatedNodes -= numAggregated;
182  }
183  } // loop over maxIters
184 
185 } // BuildAggregatesRandom
186 
187 template <class LO, class GO, class Node>
190  const LWGraph_kokkos& graph,
191  Aggregates& aggregates,
192  Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
193  LO& numNonAggregatedNodes) const {
194  const LO numRows = graph.GetNodeNumVertices();
195  const int myRank = graph.GetComm()->getRank();
196 
197  auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
198  auto procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
199  auto colors = aggregates.GetGraphColors();
200  const LO numColors = aggregates.GetGraphNumColors();
201  LO numLocalAggregates = aggregates.GetNumAggregates();
202 
203  auto lclLWGraph = graph;
204 
205  const int defaultConnectWeight = 100;
206  const int penaltyConnectWeight = 10;
207 
208  Kokkos::View<int*, device_type> connectWeight(Kokkos::ViewAllocateWithoutInitializing("connectWeight"), numRows);
209  Kokkos::View<int*, device_type> aggWeight(Kokkos::ViewAllocateWithoutInitializing("aggWeight"), numLocalAggregates); // This gets re-initialized at the start of each "color" loop
210  Kokkos::View<int*, device_type> aggPenaltyUpdates("aggPenaltyUpdates", numLocalAggregates);
211  Kokkos::View<int*, device_type> aggPenalties("aggPenalties", numLocalAggregates);
212 
213  Kokkos::deep_copy(connectWeight, defaultConnectWeight);
214 
215  // We do this cycle twice.
216  // I don't know why, but ML does it too
217  // taw: by running the aggregation routine more than once there is a chance that also
218  // non-aggregated nodes with a node distance of two are added to existing aggregates.
219  // Assuming that the aggregate size is 3 in each direction running the algorithm only twice
220  // should be sufficient.
221  int maxIters = 2;
222  int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
223  if (maxNodesPerAggregate == std::numeric_limits<int>::max()) {
224  maxIters = 1;
225  }
226  for (int iter = 0; iter < maxIters; ++iter) {
227  for (LO color = 1; color <= numColors; color++) {
228  Kokkos::deep_copy(aggWeight, 0);
229 
230  // the reduce counts how many nodes are aggregated by this phase,
231  // which will then be subtracted from numNonAggregatedNodes
232  LO numAggregated = 0;
233  Kokkos::parallel_for(
234  "Aggregation Phase 2b: updating agg weights",
235  Kokkos::RangePolicy<execution_space>(0, numRows),
236  KOKKOS_LAMBDA(const LO i) {
237  if (aggStat(i) != READY || colors(i) != color)
238  return;
239  auto neighOfINode = lclLWGraph.getNeighborVertices(i);
240  for (int j = 0; j < neighOfINode.length; j++) {
241  LO neigh = neighOfINode(j);
242  // We don't check (neigh != i), as it is covered by checking
243  // (aggStat[neigh] == AGGREGATED)
244  if (lclLWGraph.isLocalNeighborVertex(neigh) &&
245  aggStat(neigh) == AGGREGATED)
246  Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)),
247  connectWeight(neigh));
248  }
249  });
250 
251  Kokkos::parallel_reduce(
252  "Aggregation Phase 2b: aggregates expansion",
253  Kokkos::RangePolicy<execution_space>(0, numRows),
254  KOKKOS_LAMBDA(const LO i, LO& tmpNumAggregated) {
255  if (aggStat(i) != READY || colors(i) != color)
256  return;
257  int bestScore = -100000;
258  int bestAggId = -1;
259  int bestConnect = -1;
260 
261  auto neighOfINode = lclLWGraph.getNeighborVertices(i);
262  for (int j = 0; j < neighOfINode.length; j++) {
263  LO neigh = neighOfINode(j);
264 
265  if (lclLWGraph.isLocalNeighborVertex(neigh) &&
266  aggStat(neigh) == AGGREGATED) {
267  auto aggId = vertex2AggId(neigh, 0);
268  int score = aggWeight(aggId) - aggPenalties(aggId);
269 
270  if (score > bestScore) {
271  bestAggId = aggId;
272  bestScore = score;
273  bestConnect = connectWeight(neigh);
274 
275  } else if (aggId == bestAggId &&
276  connectWeight(neigh) > bestConnect) {
277  bestConnect = connectWeight(neigh);
278  }
279  }
280  }
281  if (bestScore >= 0) {
282  aggStat(i) = AGGREGATED;
283  vertex2AggId(i, 0) = bestAggId;
284  procWinner(i, 0) = myRank;
285 
286  Kokkos::atomic_add(&aggPenaltyUpdates(bestAggId), 1);
287  connectWeight(i) = bestConnect - penaltyConnectWeight;
288  tmpNumAggregated++;
289  }
290  },
291  numAggregated); // parallel_reduce
292 
293  Kokkos::parallel_for(
294  "Aggregation Phase 2b: updating agg penalties",
295  Kokkos::RangePolicy<execution_space>(0, numLocalAggregates),
296  KOKKOS_LAMBDA(const LO agg) {
297  aggPenalties(agg) += aggPenaltyUpdates(agg);
298  aggPenaltyUpdates(agg) = 0;
299  });
300  numNonAggregatedNodes -= numAggregated;
301  }
302  } // loop over k
303 } // BuildAggregatesDeterministic
304 } // namespace MueLu
305 
306 #endif // MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP
Lightweight MueLu representation of a compressed row storage graph.
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
Container class for aggregation information.
void BuildAggregates(const ParameterList &params, const LWGraph_kokkos &graph, Aggregates &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
Local aggregation.
KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const
T & get(const std::string &name, T def_value)
LocalOrdinal LO
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
LO GetGraphNumColors()
Get the number of colors needed by the distance 2 coloring.
void BuildAggregatesRandom(const ParameterList &params, const LWGraph_kokkos &graph, Aggregates &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
colors_view_type & GetGraphColors()
Get a distance 2 coloring of the underlying graph. The coloring is computed and set during Phase1 of ...
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
TransListIter iter
void BuildAggregatesDeterministic(const ParameterList &params, const LWGraph_kokkos &graph, Aggregates &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
Timer to be used in non-factories.
const RCP< const Teuchos::Comm< int > > GetComm() const