MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_AggregationPhase2aAlgorithm_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DEF_HPP
47 #define MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DEF_HPP
48 
49 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
50 
51 #include <Teuchos_Comm.hpp>
52 #include <Teuchos_CommHelpers.hpp>
53 
54 #include <Xpetra_Vector.hpp>
55 
57 
58 #include "MueLu_Aggregates_kokkos.hpp"
59 #include "MueLu_Exceptions.hpp"
60 #include "MueLu_LWGraph_kokkos.hpp"
61 #include "MueLu_Monitor.hpp"
62 
63 #include "Kokkos_Sort.hpp"
64 
65 namespace MueLu {
66 
67  template <class LocalOrdinal, class GlobalOrdinal, class Node>
68  void AggregationPhase2aAlgorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
69  BuildAggregates(const ParameterList& params,
70  const LWGraph_kokkos& graph,
71  Aggregates_kokkos& aggregates,
73  LO& numNonAggregatedNodes) const {
74 
75  if(params.get<bool>("aggregation: deterministic")) {
76  Monitor m(*this, "BuildAggregatesDeterministic");
77  BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes);
78  } else {
79  Monitor m(*this, "BuildAggregatesRandom");
80  BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes);
81  }
82 
83  } // BuildAggregates
84 
85  template <class LO, class GO, class Node>
86  void AggregationPhase2aAlgorithm_kokkos<LO, GO, Node>::
87  BuildAggregatesRandom(const ParameterList& params,
88  const LWGraph_kokkos& graph,
89  Aggregates_kokkos& aggregates,
91  LO& numNonAggregatedNodes) const
92  {
93  using memory_space = typename LWGraph_kokkos::memory_space;
94  using execution_space = typename LWGraph_kokkos::execution_space;
95 
96  const int minNodesPerAggregate = params.get<int>("aggregation: min agg size");
97  const int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
98 
99  const LO numRows = graph.GetNodeNumVertices();
100  const int myRank = graph.GetComm()->getRank();
101 
102  auto vertex2AggId = aggregates.GetVertex2AggId()->template getLocalView<memory_space>();
103  auto procWinner = aggregates.GetProcWinner() ->template getLocalView<memory_space>();
104  auto colors = aggregates.GetGraphColors();
105  const LO numColors = aggregates.GetGraphNumColors();
106 
107  LO numLocalNodes = numRows;
108  LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
109 
110  const double aggFactor = 0.5;
111  double factor = static_cast<double>(numLocalAggregated)/(numLocalNodes+1);
112  factor = pow(factor, aggFactor);
113 
114  // LBV on Sept 12, 2019: this looks a little heavy handed,
115  // I'm not sure a view is needed to perform atomic updates.
116  // If we can avoid this and use a simple LO that would be
117  // simpler for later maintenance.
118  Kokkos::View<LO, memory_space> numLocalAggregates("numLocalAggregates");
119  typename Kokkos::View<LO, memory_space>::HostMirror h_numLocalAggregates =
120  Kokkos::create_mirror_view(numLocalAggregates);
121  h_numLocalAggregates() = aggregates.GetNumAggregates();
122  Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
123 
124  // Now we create new aggregates using root nodes in all colors other than the first color,
125  // as the first color was already exhausted in Phase 1.
126  for(int color = 2; color < numColors + 1; ++color) {
127  LO tmpNumNonAggregatedNodes = 0;
128  Kokkos::parallel_reduce("Aggregation Phase 2a: loop over each individual color",
130  KOKKOS_LAMBDA (const LO rootCandidate, LO& lNumNonAggregatedNodes) {
131  if(aggStat(rootCandidate) == READY &&
132  colors(rootCandidate) == color) {
133 
134  LO aggSize = 0;
135  auto neighbors = graph.getNeighborVertices(rootCandidate);
136 
137  // Loop over neighbors to count how many nodes could join
138  // the new aggregate
139  LO numNeighbors = 0;
140  for(int j = 0; j < neighbors.length; ++j) {
141  LO neigh = neighbors(j);
142  if(neigh != rootCandidate) {
143  if(graph.isLocalNeighborVertex(neigh) &&
144  aggStat(neigh) == READY &&
145  aggSize < maxNodesPerAggregate) {
146  // aggList(aggSize) = neigh;
147  ++aggSize;
148  }
149  ++numNeighbors;
150  }
151  }
152 
153  // If a sufficient number of nodes can join the new aggregate
154  // then we actually create the aggregate.
155  if(aggSize > minNodesPerAggregate &&
156  aggSize > factor*numNeighbors) {
157 
158  // aggregates.SetIsRoot(rootCandidate);
159  LO aggIndex = Kokkos::
160  atomic_fetch_add(&numLocalAggregates(), 1);
161 
162  for(int j = 0; j < neighbors.length; ++j) {
163  LO neigh = neighbors(j);
164  if(neigh != rootCandidate) {
165  if(graph.isLocalNeighborVertex(neigh) &&
166  aggStat(neigh) == READY &&
167  aggSize < maxNodesPerAggregate) {
168  aggStat(neigh) = AGGREGATED;
169  vertex2AggId(neigh, 0) = aggIndex;
170  procWinner(neigh, 0) = myRank;
171  }
172  }
173  }
174  lNumNonAggregatedNodes -= aggSize;
175  }
176  }
177  }, tmpNumNonAggregatedNodes);
178  numNonAggregatedNodes += tmpNumNonAggregatedNodes;
179  }
180 
181  // update aggregate object
182  Kokkos::deep_copy(h_numLocalAggregates, numLocalAggregates);
183  aggregates.SetNumAggregates(h_numLocalAggregates());
184  } // BuildAggregatesRandom
185 
186  template <class LO, class GO, class Node>
187  void AggregationPhase2aAlgorithm_kokkos<LO, GO, Node>::
188  BuildAggregatesDeterministic(const ParameterList& params,
189  const LWGraph_kokkos& graph,
190  Aggregates_kokkos& aggregates,
192  LO& numNonAggregatedNodes) const
193  {
194  using memory_space = typename LWGraph_kokkos::memory_space;
195  using execution_space = typename LWGraph_kokkos::execution_space;
196 
197  const int minNodesPerAggregate = params.get<int>("aggregation: min agg size");
198  const int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
199 
200  const LO numRows = graph.GetNodeNumVertices();
201  const int myRank = graph.GetComm()->getRank();
202 
203  auto vertex2AggId = aggregates.GetVertex2AggId()->template getLocalView<memory_space>();
204  auto procWinner = aggregates.GetProcWinner() ->template getLocalView<memory_space>();
205  auto colors = aggregates.GetGraphColors();
206  const LO numColors = aggregates.GetGraphNumColors();
207 
208  LO numLocalNodes = procWinner.size();
209  LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
210 
211  const double aggFactor = 0.5;
212  double factor = as<double>(numLocalAggregated)/(numLocalNodes+1);
213  factor = pow(factor, aggFactor);
214 
215  Kokkos::View<LO, memory_space> numLocalAggregates("numLocalAggregates");
216  typename Kokkos::View<LO, memory_space>::HostMirror h_numLocalAggregates =
217  Kokkos::create_mirror_view(numLocalAggregates);
218  h_numLocalAggregates() = aggregates.GetNumAggregates();
219  Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
220 
221  // Now we create new aggregates using root nodes in all colors other than the first color,
222  // as the first color was already exhausted in Phase 1.
223  //
224  // In the deterministic version, exactly the same set of aggregates will be created
225  // (as the nondeterministic version)
226  // because no vertex V can be a neighbor of two vertices of the same color, so two root
227  // candidates can't fight over V
228  //
229  // But, the precise values in vertex2AggId need to match exactly, so just sort the new
230  // roots of each color before assigning aggregate IDs
231 
232  //numNonAggregatedNodes is the best available upper bound for the number of aggregates
233  //which may be created in this phase, so use it for the size of newRoots
234  Kokkos::View<LO*, memory_space> newRoots("New root LIDs", numNonAggregatedNodes);
235  Kokkos::View<LO, memory_space> numNewRoots("Number of new aggregates of current color");
236  auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots);
237  for(int color = 1; color < numColors + 1; ++color) {
238  h_numNewRoots() = 0;
239  Kokkos::deep_copy(numNewRoots, h_numNewRoots);
240  Kokkos::parallel_for("Aggregation Phase 2a: determining new roots of current color",
242  KOKKOS_LAMBDA(const LO rootCandidate) {
243  if(aggStat(rootCandidate) == READY &&
244  colors(rootCandidate) == color) {
245  LO aggSize = 0;
246  auto neighbors = graph.getNeighborVertices(rootCandidate);
247  // Loop over neighbors to count how many nodes could join
248  // the new aggregate
249  LO numNeighbors = 0;
250  for(int j = 0; j < neighbors.length; ++j) {
251  LO neigh = neighbors(j);
252  if(neigh != rootCandidate)
253  {
254  if(graph.isLocalNeighborVertex(neigh) &&
255  aggStat(neigh) == READY &&
256  aggSize < maxNodesPerAggregate)
257  {
258  ++aggSize;
259  }
260  ++numNeighbors;
261  }
262  }
263  // If a sufficient number of nodes can join the new aggregate
264  // then we mark rootCandidate as a future root.
265  if(aggSize > minNodesPerAggregate && aggSize > factor*numNeighbors) {
266  LO newRootIndex = Kokkos::atomic_fetch_add(&numNewRoots(), 1);
267  newRoots(newRootIndex) = rootCandidate;
268  }
269  }
270  });
271  Kokkos::deep_copy(h_numNewRoots, numNewRoots);
272 
273  if(h_numNewRoots() > 0) {
274  //sort the new root indices
275  Kokkos::sort(newRoots, 0, h_numNewRoots());
276  //now, loop over all new roots again and actually create the aggregates
277  LO tmpNumNonAggregatedNodes = 0;
278  //First, just find the set of color vertices which will become aggregate roots
279  Kokkos::parallel_reduce("Aggregation Phase 2a: create new aggregates",
280  Kokkos::RangePolicy<execution_space>(0, h_numNewRoots()),
281  KOKKOS_LAMBDA (const LO newRootIndex, LO& lNumNonAggregatedNodes) {
282  LO root = newRoots(newRootIndex);
283  LO newAggID = numLocalAggregates() + newRootIndex;
284  auto neighbors = graph.getNeighborVertices(root);
285  // Loop over neighbors and add them to new aggregate
286  aggStat(root) = AGGREGATED;
287  vertex2AggId(root, 0) = newAggID;
288  LO aggSize = 1;
289  for(int j = 0; j < neighbors.length; ++j) {
290  LO neigh = neighbors(j);
291  if(neigh != root) {
292  if(graph.isLocalNeighborVertex(neigh) &&
293  aggStat(neigh) == READY &&
294  aggSize < maxNodesPerAggregate) {
295  aggStat(neigh) = AGGREGATED;
296  vertex2AggId(neigh, 0) = newAggID;
297  procWinner(neigh, 0) = myRank;
298  aggSize++;
299  }
300  }
301  }
302  lNumNonAggregatedNodes -= aggSize;
303  }, tmpNumNonAggregatedNodes);
304  numNonAggregatedNodes += tmpNumNonAggregatedNodes;
305  h_numLocalAggregates() += h_numNewRoots();
306  Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
307  }
308  }
309  aggregates.SetNumAggregates(h_numLocalAggregates());
310  }
311 
312 } // end namespace
313 
314 #endif // HAVE_MUELU_KOKKOS_REFACTOR
315 #endif // MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DEF_HPP
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=0)
void parallel_reduce(const std::string &label, const PolicyType &policy, const FunctorType &functor, ReturnType &return_value, typename Impl::enable_if< Kokkos::Impl::is_execution_policy< PolicyType >::value >::type *=0)
KOKKOS_INLINE_FUNCTION Kokkos::complex< RealType > pow(const complex< RealType > &x, const RealType &e)
LocalOrdinal LO
void deep_copy(const View< DT, DP...> &dst, typename ViewTraits< DT, DP...>::const_value_type &value, typename std::enable_if< std::is_same< typename ViewTraits< DT, DP...>::specialize, void >::value >::type *=0)