MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_AggregationPhase2bAlgorithm_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP
47 #define MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP
48 
49 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
50 
51 #include <Teuchos_Comm.hpp>
52 #include <Teuchos_CommHelpers.hpp>
53 
54 #include <Xpetra_Vector.hpp>
55 
57 
58 #include "MueLu_Aggregates_kokkos.hpp"
59 #include "MueLu_Exceptions.hpp"
60 #include "MueLu_LWGraph_kokkos.hpp"
61 #include "MueLu_Monitor.hpp"
62 
63 namespace MueLu {
64 
65  // Try to stick unaggregated nodes into a neighboring aggregate if they are
66  // not already too big
67  template <class LocalOrdinal, class GlobalOrdinal, class Node>
68  void AggregationPhase2bAlgorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
69  BuildAggregates(const ParameterList& params,
70  const LWGraph_kokkos& graph,
71  Aggregates_kokkos& aggregates,
73  LO& numNonAggregatedNodes) const {
74 
75  if(params.get<bool>("aggregation: deterministic")) {
76  Monitor m(*this, "BuildAggregatesDeterministic");
77  BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes);
78  } else {
79  Monitor m(*this, "BuildAggregatesRandom");
80  BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes);
81  }
82 
83  } // BuildAggregates
84 
85  template <class LO, class GO, class Node>
86  void AggregationPhase2bAlgorithm_kokkos<LO, GO, Node>::
87  BuildAggregatesRandom(const ParameterList& params,
88  const LWGraph_kokkos& graph,
89  Aggregates_kokkos& aggregates,
91  LO& numNonAggregatedNodes) const {
92 
93  const LO numRows = graph.GetNodeNumVertices();
94  const int myRank = graph.GetComm()->getRank();
95 
96  auto vertex2AggId = aggregates.GetVertex2AggId()->template getLocalView<memory_space>();
97  auto procWinner = aggregates.GetProcWinner() ->template getLocalView<memory_space>();
98  auto colors = aggregates.GetGraphColors();
99  const LO numColors = aggregates.GetGraphNumColors();
100  const LO numLocalAggregates = aggregates.GetNumAggregates();
101 
102  const LO defaultConnectWeight = 100;
103  const LO penaltyConnectWeight = 10;
104 
105  Kokkos::View<LO*, memory_space> aggWeight ("aggWeight", numLocalAggregates);
106  Kokkos::View<LO*, memory_space> connectWeight("connectWeight", numRows);
107  Kokkos::View<LO*, memory_space> aggPenalties ("aggPenalties", numLocalAggregates);
108 
109  Kokkos::deep_copy(connectWeight, defaultConnectWeight);
110 
111  // taw: by running the aggregation routine more than once there is a chance that also
112  // non-aggregated nodes with a node distance of two are added to existing aggregates.
113  // Assuming that the aggregate size is 3 in each direction running the algorithm only twice
114  // should be sufficient.
115  // lbv: If the prior phase of aggregation where run without specifying an aggregate size,
116  // the distance 2 coloring and phase 1 aggregation actually guarantee that only one iteration
117  // is needed to reach distance 2 neighbors.
118  int maxIters = 2;
119  int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
120  if(maxNodesPerAggregate == std::numeric_limits<int>::max()) {maxIters = 1;}
121  for (int iter = 0; iter < maxIters; ++iter) {
122  for(LO color = 1; color <= numColors; ++color) {
123  Kokkos::deep_copy(aggWeight, 0);
124 
125  //the reduce counts how many nodes are aggregated by this phase,
126  //which will then be subtracted from numNonAggregatedNodes
127  LO numAggregated = 0;
128  Kokkos::parallel_reduce("Aggregation Phase 2b: aggregates expansion",
130  KOKKOS_LAMBDA (const LO i, LO& tmpNumAggregated) {
131  if (aggStat(i) != READY || colors(i) != color)
132  return;
133 
134  auto neighOfINode = graph.getNeighborVertices(i);
135  for (int j = 0; j < neighOfINode.length; j++) {
136  LO neigh = neighOfINode(j);
137 
138  // We don't check (neigh != i), as it is covered by checking
139  // (aggStat[neigh] == AGGREGATED)
140  if (graph.isLocalNeighborVertex(neigh) &&
141  aggStat(neigh) == AGGREGATED)
142  Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)),
143  connectWeight(neigh));
144  }
145 
146  int bestScore = -100000;
147  int bestAggId = -1;
148  int bestConnect = -1;
149 
150  for (int j = 0; j < neighOfINode.length; j++) {
151  LO neigh = neighOfINode(j);
152 
153  if (graph.isLocalNeighborVertex(neigh) &&
154  aggStat(neigh) == AGGREGATED) {
155  auto aggId = vertex2AggId(neigh, 0);
156  int score = aggWeight(aggId) - aggPenalties(aggId);
157 
158  if (score > bestScore) {
159  bestAggId = aggId;
160  bestScore = score;
161  bestConnect = connectWeight(neigh);
162 
163  } else if (aggId == bestAggId &&
164  connectWeight(neigh) > bestConnect) {
165  bestConnect = connectWeight(neigh);
166  }
167  }
168  }
169  if (bestScore >= 0) {
170  aggStat(i) = AGGREGATED;
171  vertex2AggId(i, 0) = bestAggId;
172  procWinner(i, 0) = myRank;
173 
174  Kokkos::atomic_add(&aggPenalties(bestAggId), 1);
175  connectWeight(i) = bestConnect - penaltyConnectWeight;
176  tmpNumAggregated++;
177  }
178  }, numAggregated); //parallel_for
179  numNonAggregatedNodes -= numAggregated;
180  }
181  } // loop over maxIters
182 
183  } // BuildAggregatesRandom
184 
185 
186 
187  template <class LO, class GO, class Node>
188  void AggregationPhase2bAlgorithm_kokkos<LO, GO, Node>::
189  BuildAggregatesDeterministic(const ParameterList& params,
190  const LWGraph_kokkos& graph,
191  Aggregates_kokkos& aggregates,
193  LO& numNonAggregatedNodes) const {
194 
195  const LO numRows = graph.GetNodeNumVertices();
196  const int myRank = graph.GetComm()->getRank();
197 
198  auto vertex2AggId = aggregates.GetVertex2AggId()->template getLocalView<memory_space>();
199  auto procWinner = aggregates.GetProcWinner() ->template getLocalView<memory_space>();
200  auto colors = aggregates.GetGraphColors();
201  const LO numColors = aggregates.GetGraphNumColors();
202  LO numLocalAggregates = aggregates.GetNumAggregates();
203 
204  const int defaultConnectWeight = 100;
205  const int penaltyConnectWeight = 10;
206 
207  Kokkos::View<int*, memory_space> connectWeight ("connectWeight", numRows);
208  Kokkos::View<int*, memory_space> aggWeight ("aggWeight", numLocalAggregates);
209  Kokkos::View<int*, memory_space> aggPenaltyUpdates("aggPenaltyUpdates", numLocalAggregates);
210  Kokkos::View<int*, memory_space> aggPenalties ("aggPenalties", numLocalAggregates);
211 
212  Kokkos::deep_copy(connectWeight, defaultConnectWeight);
213 
214  // We do this cycle twice.
215  // I don't know why, but ML does it too
216  // taw: by running the aggregation routine more than once there is a chance that also
217  // non-aggregated nodes with a node distance of two are added to existing aggregates.
218  // Assuming that the aggregate size is 3 in each direction running the algorithm only twice
219  // should be sufficient.
220  int maxIters = 2;
221  int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
222  if(maxNodesPerAggregate == std::numeric_limits<int>::max()) {maxIters = 1;}
223  for (int iter = 0; iter < maxIters; ++iter) {
224  for(LO color = 1; color <= numColors; color++) {
225  Kokkos::deep_copy(aggWeight, 0);
226 
227  //the reduce counts how many nodes are aggregated by this phase,
228  //which will then be subtracted from numNonAggregatedNodes
229  LO numAggregated = 0;
230  Kokkos::parallel_for("Aggregation Phase 2b: updating agg weights",
232  KOKKOS_LAMBDA (const LO i)
233  {
234  if (aggStat(i) != READY || colors(i) != color)
235  return;
236  auto neighOfINode = graph.getNeighborVertices(i);
237  for (int j = 0; j < neighOfINode.length; j++) {
238  LO neigh = neighOfINode(j);
239  // We don't check (neigh != i), as it is covered by checking
240  // (aggStat[neigh] == AGGREGATED)
241  if (graph.isLocalNeighborVertex(neigh) &&
242  aggStat(neigh) == AGGREGATED)
243  Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)),
244  connectWeight(neigh));
245  }
246  });
247  execution_space().fence();
248  Kokkos::parallel_reduce("Aggregation Phase 2b: aggregates expansion",
250  KOKKOS_LAMBDA (const LO i, LO& tmpNumAggregated)
251  {
252  if (aggStat(i) != READY || colors(i) != color)
253  return;
254  int bestScore = -100000;
255  int bestAggId = -1;
256  int bestConnect = -1;
257 
258  auto neighOfINode = graph.getNeighborVertices(i);
259  for (int j = 0; j < neighOfINode.length; j++) {
260  LO neigh = neighOfINode(j);
261 
262  if (graph.isLocalNeighborVertex(neigh) &&
263  aggStat(neigh) == AGGREGATED) {
264  auto aggId = vertex2AggId(neigh, 0);
265  int score = aggWeight(aggId) - aggPenalties(aggId);
266 
267  if (score > bestScore) {
268  bestAggId = aggId;
269  bestScore = score;
270  bestConnect = connectWeight(neigh);
271 
272  } else if (aggId == bestAggId &&
273  connectWeight(neigh) > bestConnect) {
274  bestConnect = connectWeight(neigh);
275  }
276  }
277  }
278  if (bestScore >= 0) {
279  aggStat(i) = AGGREGATED;
280  vertex2AggId(i, 0) = bestAggId;
281  procWinner(i, 0) = myRank;
282 
283  Kokkos::atomic_add(&aggPenaltyUpdates(bestAggId), 1);
284  connectWeight(i) = bestConnect - penaltyConnectWeight;
285  tmpNumAggregated++;
286  }
287  }, numAggregated); //parallel_reduce
288  execution_space().fence();
289  Kokkos::parallel_for("Aggregation Phase 2b: updating agg penalties",
290  Kokkos::RangePolicy<execution_space>(0, numLocalAggregates),
291  KOKKOS_LAMBDA (const LO agg)
292  {
293  aggPenalties(agg) += aggPenaltyUpdates(agg);
294  aggPenaltyUpdates(agg) = 0;
295  });
296  numNonAggregatedNodes -= numAggregated;
297  }
298  } // loop over k
299  } // BuildAggregatesDeterministic
300 } // end namespace
301 
302 #endif // HAVE_MUELU_KOKKOS_REFACTOR
303 #endif // MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP
void deep_copy(const View< DT, DP...> &dst, typename ViewTraits< DT, DP...>::const_value_type &value, typename std::enable_if< std::is_same< typename ViewTraits< DT, DP...>::specialize, void >::value >::type *=nullptr)
void parallel_reduce(const std::string &label, const PolicyType &policy, const FunctorType &functor, ReturnType &return_value, typename std::enable_if< Kokkos::Impl::is_execution_policy< PolicyType >::value >::type *=nullptr)
TransListIter iter
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename std::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=nullptr)