MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_AggregationPhase2aAlgorithm_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DEF_HPP
47 #define MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DEF_HPP
48 
49 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
50 
51 #include <Teuchos_Comm.hpp>
52 #include <Teuchos_CommHelpers.hpp>
53 
54 #include <Xpetra_Vector.hpp>
55 
57 
58 #include "MueLu_Aggregates_kokkos.hpp"
59 #include "MueLu_Exceptions.hpp"
60 #include "MueLu_LWGraph_kokkos.hpp"
61 #include "MueLu_Monitor.hpp"
62 
63 #include "Kokkos_Sort.hpp"
64 
65 namespace MueLu {
66 
67  template <class LocalOrdinal, class GlobalOrdinal, class Node>
68  void AggregationPhase2aAlgorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
69  BuildAggregates(const ParameterList& params,
70  const LWGraph_kokkos& graph,
71  Aggregates_kokkos& aggregates,
73  LO& numNonAggregatedNodes) const {
74 
75  if(params.get<bool>("aggregation: deterministic")) {
76  Monitor m(*this, "BuildAggregatesDeterministic");
77  BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes);
78  } else {
79  Monitor m(*this, "BuildAggregatesRandom");
80  BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes);
81  }
82 
83  } // BuildAggregates
84 
85  template <class LO, class GO, class Node>
86  void AggregationPhase2aAlgorithm_kokkos<LO, GO, Node>::
87  BuildAggregatesRandom(const ParameterList& params,
88  const LWGraph_kokkos& graph,
89  Aggregates_kokkos& aggregates,
91  LO& numNonAggregatedNodes) const
92  {
93  const int minNodesPerAggregate = params.get<int>("aggregation: min agg size");
94  const int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
95  bool includeRootInAgg = params.get<bool>("aggregation: phase2a include root");
96 
97  const LO numRows = graph.GetNodeNumVertices();
98  const int myRank = graph.GetComm()->getRank();
99 
100  auto vertex2AggId = aggregates.GetVertex2AggId()->template getLocalView<memory_space>();
101  auto procWinner = aggregates.GetProcWinner() ->template getLocalView<memory_space>();
102  auto colors = aggregates.GetGraphColors();
103  const LO numColors = aggregates.GetGraphNumColors();
104 
105  LO numLocalNodes = numRows;
106  LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
107 
108  const double aggFactor = 0.5;
109  double factor = static_cast<double>(numLocalAggregated)/(numLocalNodes+1);
110  factor = pow(factor, aggFactor);
111 
112  // LBV on Sept 12, 2019: this looks a little heavy handed,
113  // I'm not sure a view is needed to perform atomic updates.
114  // If we can avoid this and use a simple LO that would be
115  // simpler for later maintenance.
116  Kokkos::View<LO, memory_space> numLocalAggregates("numLocalAggregates");
117  typename Kokkos::View<LO, memory_space>::HostMirror h_numLocalAggregates =
118  Kokkos::create_mirror_view(numLocalAggregates);
119  h_numLocalAggregates() = aggregates.GetNumAggregates();
120  Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
121 
122  // Now we create new aggregates using root nodes in all colors other than the first color,
123  // as the first color was already exhausted in Phase 1.
124  for(int color = 2; color < numColors + 1; ++color) {
125  LO tmpNumNonAggregatedNodes = 0;
126  Kokkos::parallel_reduce("Aggregation Phase 2a: loop over each individual color",
128  KOKKOS_LAMBDA (const LO rootCandidate, LO& lNumNonAggregatedNodes) {
129  if(aggStat(rootCandidate) == READY &&
130  colors(rootCandidate) == color) {
131 
132  LO aggSize;
133  if (includeRootInAgg)
134  aggSize = 1;
135  else
136  aggSize = 0;
137 
138  auto neighbors = graph.getNeighborVertices(rootCandidate);
139 
140  // Loop over neighbors to count how many nodes could join
141  // the new aggregate
142  LO numNeighbors = 0;
143  for(int j = 0; j < neighbors.length; ++j) {
144  LO neigh = neighbors(j);
145  if(neigh != rootCandidate) {
146  if(graph.isLocalNeighborVertex(neigh) &&
147  (aggStat(neigh) == READY) &&
148  (aggSize < maxNodesPerAggregate)) {
149  ++aggSize;
150  }
151  ++numNeighbors;
152  }
153  }
154 
155  // If a sufficient number of nodes can join the new aggregate
156  // then we actually create the aggregate.
157  if(aggSize > minNodesPerAggregate &&
158  ((includeRootInAgg && aggSize-1 > factor*numNeighbors) ||
159  (!includeRootInAgg && aggSize > factor*numNeighbors))) {
160 
161  // aggregates.SetIsRoot(rootCandidate);
162  LO aggIndex = Kokkos::
163  atomic_fetch_add(&numLocalAggregates(), 1);
164 
165  LO numAggregated = 0;
166 
167  if (includeRootInAgg) {
168  // Add the root.
169  aggStat(rootCandidate) = AGGREGATED;
170  vertex2AggId(rootCandidate, 0) = aggIndex;
171  procWinner(rootCandidate, 0) = myRank;
172  ++numAggregated;
173  --lNumNonAggregatedNodes;
174  }
175 
176  for(int neighIdx = 0; neighIdx < neighbors.length; ++neighIdx) {
177  LO neigh = neighbors(neighIdx);
178  if(neigh != rootCandidate) {
179  if(graph.isLocalNeighborVertex(neigh) &&
180  (aggStat(neigh) == READY) &&
181  (numAggregated < aggSize)) {
182  aggStat(neigh) = AGGREGATED;
183  vertex2AggId(neigh, 0) = aggIndex;
184  procWinner(neigh, 0) = myRank;
185 
186  ++numAggregated;
187  --lNumNonAggregatedNodes;
188  }
189  }
190  }
191  }
192  }
193  }, tmpNumNonAggregatedNodes);
194  numNonAggregatedNodes += tmpNumNonAggregatedNodes;
195  }
196 
197  // update aggregate object
198  Kokkos::deep_copy(h_numLocalAggregates, numLocalAggregates);
199  aggregates.SetNumAggregates(h_numLocalAggregates());
200  } // BuildAggregatesRandom
201 
202  template <class LO, class GO, class Node>
203  void AggregationPhase2aAlgorithm_kokkos<LO, GO, Node>::
204  BuildAggregatesDeterministic(const ParameterList& params,
205  const LWGraph_kokkos& graph,
206  Aggregates_kokkos& aggregates,
208  LO& numNonAggregatedNodes) const
209  {
210  const int minNodesPerAggregate = params.get<int>("aggregation: min agg size");
211  const int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
212 
213  const LO numRows = graph.GetNodeNumVertices();
214  const int myRank = graph.GetComm()->getRank();
215 
216  auto vertex2AggId = aggregates.GetVertex2AggId()->template getLocalView<memory_space>();
217  auto procWinner = aggregates.GetProcWinner() ->template getLocalView<memory_space>();
218  auto colors = aggregates.GetGraphColors();
219  const LO numColors = aggregates.GetGraphNumColors();
220 
221  LO numLocalNodes = procWinner.size();
222  LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
223 
224  const double aggFactor = 0.5;
225  double factor = as<double>(numLocalAggregated)/(numLocalNodes+1);
226  factor = pow(factor, aggFactor);
227 
228  Kokkos::View<LO, memory_space> numLocalAggregates("numLocalAggregates");
229  typename Kokkos::View<LO, memory_space>::HostMirror h_numLocalAggregates =
230  Kokkos::create_mirror_view(numLocalAggregates);
231  h_numLocalAggregates() = aggregates.GetNumAggregates();
232  Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
233 
234  // Now we create new aggregates using root nodes in all colors other than the first color,
235  // as the first color was already exhausted in Phase 1.
236  //
237  // In the deterministic version, exactly the same set of aggregates will be created
238  // (as the nondeterministic version)
239  // because no vertex V can be a neighbor of two vertices of the same color, so two root
240  // candidates can't fight over V
241  //
242  // But, the precise values in vertex2AggId need to match exactly, so just sort the new
243  // roots of each color before assigning aggregate IDs
244 
245  //numNonAggregatedNodes is the best available upper bound for the number of aggregates
246  //which may be created in this phase, so use it for the size of newRoots
247  Kokkos::View<LO*, memory_space> newRoots("New root LIDs", numNonAggregatedNodes);
248  Kokkos::View<LO, memory_space> numNewRoots("Number of new aggregates of current color");
249  auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots);
250  for(int color = 1; color < numColors + 1; ++color) {
251  h_numNewRoots() = 0;
252  Kokkos::deep_copy(numNewRoots, h_numNewRoots);
253  Kokkos::parallel_for("Aggregation Phase 2a: determining new roots of current color",
255  KOKKOS_LAMBDA(const LO rootCandidate) {
256  if(aggStat(rootCandidate) == READY &&
257  colors(rootCandidate) == color) {
258  LO aggSize = 0;
259  auto neighbors = graph.getNeighborVertices(rootCandidate);
260  // Loop over neighbors to count how many nodes could join
261  // the new aggregate
262  LO numNeighbors = 0;
263  for(int j = 0; j < neighbors.length; ++j) {
264  LO neigh = neighbors(j);
265  if(neigh != rootCandidate)
266  {
267  if(graph.isLocalNeighborVertex(neigh) &&
268  aggStat(neigh) == READY &&
269  aggSize < maxNodesPerAggregate)
270  {
271  ++aggSize;
272  }
273  ++numNeighbors;
274  }
275  }
276  // If a sufficient number of nodes can join the new aggregate
277  // then we mark rootCandidate as a future root.
278  if(aggSize > minNodesPerAggregate && aggSize > factor*numNeighbors) {
279  LO newRootIndex = Kokkos::atomic_fetch_add(&numNewRoots(), 1);
280  newRoots(newRootIndex) = rootCandidate;
281  }
282  }
283  });
284  Kokkos::deep_copy(h_numNewRoots, numNewRoots);
285 
286  if(h_numNewRoots() > 0) {
287  //sort the new root indices
288  Kokkos::sort(newRoots, 0, h_numNewRoots());
289  //now, loop over all new roots again and actually create the aggregates
290  LO tmpNumNonAggregatedNodes = 0;
291  //First, just find the set of color vertices which will become aggregate roots
292  Kokkos::parallel_reduce("Aggregation Phase 2a: create new aggregates",
293  Kokkos::RangePolicy<execution_space>(0, h_numNewRoots()),
294  KOKKOS_LAMBDA (const LO newRootIndex, LO& lNumNonAggregatedNodes) {
295  LO root = newRoots(newRootIndex);
296  LO newAggID = numLocalAggregates() + newRootIndex;
297  auto neighbors = graph.getNeighborVertices(root);
298  // Loop over neighbors and add them to new aggregate
299  aggStat(root) = AGGREGATED;
300  vertex2AggId(root, 0) = newAggID;
301  LO aggSize = 1;
302  for(int j = 0; j < neighbors.length; ++j) {
303  LO neigh = neighbors(j);
304  if(neigh != root) {
305  if(graph.isLocalNeighborVertex(neigh) &&
306  aggStat(neigh) == READY &&
307  aggSize < maxNodesPerAggregate) {
308  aggStat(neigh) = AGGREGATED;
309  vertex2AggId(neigh, 0) = newAggID;
310  procWinner(neigh, 0) = myRank;
311  aggSize++;
312  }
313  }
314  }
315  lNumNonAggregatedNodes -= aggSize;
316  }, tmpNumNonAggregatedNodes);
317  numNonAggregatedNodes += tmpNumNonAggregatedNodes;
318  h_numLocalAggregates() += h_numNewRoots();
319  Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
320  }
321  }
322  aggregates.SetNumAggregates(h_numLocalAggregates());
323  }
324 
325 } // end namespace
326 
327 #endif // HAVE_MUELU_KOKKOS_REFACTOR
328 #endif // MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DEF_HPP
void deep_copy(const View< DT, DP...> &dst, typename ViewTraits< DT, DP...>::const_value_type &value, typename std::enable_if< std::is_same< typename ViewTraits< DT, DP...>::specialize, void >::value >::type *=nullptr)
KOKKOS_INLINE_FUNCTION Kokkos::complex< RealType > pow(const complex< RealType > &x, const RealType &e)
void parallel_reduce(const std::string &label, const PolicyType &policy, const FunctorType &functor, ReturnType &return_value, typename std::enable_if< Kokkos::Impl::is_execution_policy< PolicyType >::value >::type *=nullptr)
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename std::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=nullptr)