MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_AggregationPhase1Algorithm_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DEF_HPP
47 #define MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DEF_HPP
48 
49 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
50 
51 #include <queue>
52 #include <vector>
53 
54 #include <Teuchos_Comm.hpp>
55 #include <Teuchos_CommHelpers.hpp>
56 
57 #include <Xpetra_Vector.hpp>
58 
60 
61 #include "MueLu_Aggregates_kokkos.hpp"
62 #include "MueLu_Exceptions.hpp"
63 #include "MueLu_LWGraph_kokkos.hpp"
64 #include "MueLu_Monitor.hpp"
65 
66 #include "Kokkos_Sort.hpp"
67 #include <Kokkos_ScatterView.hpp>
68 
69 namespace MueLu {
70 
71  template <class LocalOrdinal, class GlobalOrdinal, class Node>
72  void AggregationPhase1Algorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
73  BuildAggregates(const Teuchos::ParameterList& params,
74  const LWGraph_kokkos& graph,
75  Aggregates_kokkos& aggregates,
77  LO& numNonAggregatedNodes) const {
78 
79  using memory_space = typename LWGraph_kokkos::memory_space;
80 
81  std::string orderingStr = params.get<std::string>("aggregation: ordering");
82  int maxNeighAlreadySelected = params.get<int> ("aggregation: max selected neighbors");
83  int minNodesPerAggregate = params.get<int> ("aggregation: min agg size");
84  int maxNodesPerAggregate = params.get<int> ("aggregation: max agg size");
85 
86  Algorithm algorithm = Algorithm::Serial;
87  std::string algoParamName = "aggregation: phase 1 algorithm";
88  if(params.isParameter(algoParamName))
89  {
90  algorithm = algorithmFromName(params.get<std::string>(algoParamName));
91  }
92 
93  TEUCHOS_TEST_FOR_EXCEPTION(maxNodesPerAggregate < minNodesPerAggregate,
94  Exceptions::RuntimeError,
95  "MueLu::UncoupledAggregationAlgorithm::BuildAggregates: minNodesPerAggregate must be smaller or equal to MaxNodePerAggregate!");
96 
97  //Distance-2 gives less control than serial uncoupled phase 1
98  //no custom row reordering because would require making deep copy of local matrix entries and permuting it
99  //can only enforce max aggregate size
100  if(algorithm == Algorithm::Distance2)
101  {
102  if(params.get<bool>("aggregation: deterministic"))
103  {
104  Monitor m(*this, "BuildAggregatesDeterministic");
105  BuildAggregatesDeterministic(maxNodesPerAggregate, graph,
106  aggregates, aggStat, numNonAggregatedNodes);
107  } else {
108  Monitor m(*this, "BuildAggregatesRandom");
109  BuildAggregatesDistance2(maxNodesPerAggregate, graph,
110  aggregates, aggStat, numNonAggregatedNodes);
111  }
112  }
113  else
114  {
115  Monitor m(*this, "BuildAggregatesSerial");
117  = Kokkos::create_mirror(aggStat);
118  Kokkos::deep_copy(aggStatHost, aggStat);
119  std::vector<unsigned> aggstat;
120  aggstat.resize(aggStatHost.extent(0));
121  for(size_t idx = 0; idx < aggStatHost.extent(0); ++idx) {
122  aggstat[idx] = aggStatHost(idx);
123  }
124 
125  BuildAggregatesSerial(graph, aggregates, aggstat, numNonAggregatedNodes, minNodesPerAggregate,
126  maxNodesPerAggregate, maxNeighAlreadySelected, orderingStr);
127 
128  for(size_t idx = 0; idx < aggStatHost.extent(0); ++idx) {
129  aggStatHost(idx) = aggstat[idx];
130  }
131  Kokkos::deep_copy(aggStat, aggStatHost);
132  }
133  }
134 
135 
136  template <class LocalOrdinal, class GlobalOrdinal, class Node>
137  void AggregationPhase1Algorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
138  BuildAggregatesSerial(const LWGraph_kokkos& graph, Aggregates_kokkos& aggregates,
139  std::vector<unsigned>& aggStat, LO& numNonAggregatedNodes,
140  LO minNodesPerAggregate, LO maxNodesPerAggregate,
141  LO maxNeighAlreadySelected, std::string& orderingStr) const
142  {
143  enum {
144  O_NATURAL,
145  O_RANDOM,
146  O_GRAPH
147  } ordering;
148 
149  ordering = O_NATURAL; // initialize variable (fix CID 143665)
150  if (orderingStr == "natural") ordering = O_NATURAL;
151  if (orderingStr == "random" ) ordering = O_RANDOM;
152  if (orderingStr == "graph" ) ordering = O_GRAPH;
153 
154  const LO numRows = graph.GetNodeNumVertices();
155  const int myRank = graph.GetComm()->getRank();
156 
157  ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0);
158  ArrayRCP<LO> procWinner = aggregates.GetProcWinner() ->getDataNonConst(0);
159 
160  LO numLocalAggregates = aggregates.GetNumAggregates();
161 
162  ArrayRCP<LO> randomVector;
163  if (ordering == O_RANDOM) {
164  randomVector = arcp<LO>(numRows);
165  for (LO i = 0; i < numRows; i++)
166  randomVector[i] = i;
167  RandomReorder(randomVector);
168  }
169 
170  int aggIndex = -1;
171  size_t aggSize = 0;
172  std::vector<int> aggList(graph.getNodeMaxNumRowEntries());
173 
174  std::queue<LO> graphOrderQueue;
175 
176  // Main loop over all local rows of graph(A)
177  for (LO i = 0; i < numRows; i++) {
178  // Step 1: pick the next node to aggregate
179  LO rootCandidate = 0;
180  if (ordering == O_NATURAL) rootCandidate = i;
181  else if (ordering == O_RANDOM) rootCandidate = randomVector[i];
182  else if (ordering == O_GRAPH) {
183 
184  if (graphOrderQueue.size() == 0) {
185  // Current queue is empty for "graph" ordering, populate with one READY node
186  for (LO jnode = 0; jnode < numRows; jnode++)
187  if (aggStat[jnode] == READY) {
188  graphOrderQueue.push(jnode);
189  break;
190  }
191  }
192  if (graphOrderQueue.size() == 0) {
193  // There are no more ready nodes, end the phase
194  break;
195  }
196  rootCandidate = graphOrderQueue.front(); // take next node from graph ordering queue
197  graphOrderQueue.pop(); // delete this node in list
198  }
199 
200  if (aggStat[rootCandidate] != READY)
201  continue;
202 
203  // Step 2: build tentative aggregate
204  aggSize = 0;
205  aggList[aggSize++] = rootCandidate;
206 
207  auto neighOfINode = graph.getNeighborVertices(rootCandidate);
208 
209  // If the number of neighbors is less than the minimum number of nodes
210  // per aggregate, we know this is not going to be a valid root, and we
211  // may skip it, but only for "natural" and "random" (for "graph" we still
212  // need to fetch the list of local neighbors to continue)
213  if ((ordering == O_NATURAL || ordering == O_RANDOM) &&
214  as<int>(neighOfINode.length) < minNodesPerAggregate) {
215  continue;
216  }
217 
218  LO numAggregatedNeighbours = 0;
219 
220  for (int j = 0; j < as<int>(neighOfINode.length); j++) {
221  LO neigh = neighOfINode(j);
222 
223  if (neigh != rootCandidate && graph.isLocalNeighborVertex(neigh)) {
224 
225  if (aggStat[neigh] == READY || aggStat[neigh] == NOTSEL) {
226  // If aggregate size does not exceed max size, add node to the
227  // tentative aggregate
228  // NOTE: We do not exit the loop over all neighbours since we have
229  // still to count all aggregated neighbour nodes for the
230  // aggregation criteria
231  // NOTE: We check here for the maximum aggregation size. If we
232  // would do it below with all the other check too big aggregates
233  // would not be accepted at all.
234  if (aggSize < as<size_t>(maxNodesPerAggregate))
235  aggList[aggSize++] = neigh;
236 
237  } else {
238  numAggregatedNeighbours++;
239  }
240  }
241  }
242 
243  // Step 3: check if tentative aggregate is acceptable
244  if ((numAggregatedNeighbours <= maxNeighAlreadySelected) && // too many connections to other aggregates
245  (aggSize >= as<size_t>(minNodesPerAggregate))) { // too few nodes in the tentative aggregate
246  // Accept new aggregate
247  // rootCandidate becomes the root of the newly formed aggregate
248  aggregates.SetIsRoot(rootCandidate);
249  aggIndex = numLocalAggregates++;
250 
251  for (size_t k = 0; k < aggSize; k++) {
252  aggStat [aggList[k]] = AGGREGATED;
253  vertex2AggId[aggList[k]] = aggIndex;
254  procWinner [aggList[k]] = myRank;
255  }
256 
257  numNonAggregatedNodes -= aggSize;
258 
259  } else {
260  // Aggregate is not accepted
261  aggStat[rootCandidate] = NOTSEL;
262 
263  // Need this for the "graph" ordering below
264  // The original candidate is always aggList[0]
265  aggSize = 1;
266  }
267 
268  if (ordering == O_GRAPH) {
269  // Add candidates to the list of nodes
270  // NOTE: the code have slightly different meanings depending on context:
271  // - if aggregate was accepted, we add neighbors of neighbors of the original candidate
272  // - if aggregate was not accepted, we add neighbors of the original candidate
273  for (size_t k = 0; k < aggSize; k++) {
274  auto neighOfJNode = graph.getNeighborVertices(aggList[k]);
275 
276  for (int j = 0; j < as<int>(neighOfJNode.length); j++) {
277  LO neigh = neighOfJNode(j);
278 
279  if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == READY)
280  graphOrderQueue.push(neigh);
281  }
282  }
283  }
284  }
285 
286  // Reset all NOTSEL vertices to READY
287  // This simplifies other algorithms
288  for (LO i = 0; i < numRows; i++)
289  if (aggStat[i] == NOTSEL)
290  aggStat[i] = READY;
291 
292  // update aggregate object
293  aggregates.SetNumAggregates(numLocalAggregates);
294  }
295 
296  template <class LocalOrdinal, class GlobalOrdinal, class Node>
297  void AggregationPhase1Algorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
298  BuildAggregatesDistance2(const LO maxAggSize,
299  const LWGraph_kokkos& graph,
300  Aggregates_kokkos& aggregates,
302  LO& numNonAggregatedNodes) const
303  {
304  using memory_space = typename LWGraph_kokkos::memory_space;
305  using execution_space = typename LWGraph_kokkos::execution_space;
306 
307  const LO numRows = graph.GetNodeNumVertices();
308  const int myRank = graph.GetComm()->getRank();
309 
310  // Extract data from aggregates
311  auto vertex2AggId = aggregates.GetVertex2AggId()->template getLocalView<memory_space>();
312  auto procWinner = aggregates.GetProcWinner() ->template getLocalView<memory_space>();
313  auto colors = aggregates.GetGraphColors();
314 
315  LO numLocalAggregates = aggregates.GetNumAggregates();
316  Kokkos::View<LO, memory_space> aggCount("aggCount");
317  LO tmpNumLocalAggregates = 0;
318  Kokkos::parallel_reduce("Aggregation Phase 1: initial reduction over color == 1",
320  KOKKOS_LAMBDA (const LO i, LO& lnumLocalAggregates) {
321  if(colors(i) == 1 && aggStat(i) == READY) {
322  const LO idx = Kokkos::atomic_fetch_add (&aggCount(), 1);
323  vertex2AggId(i, 0) = idx;
324  aggStat(i) = AGGREGATED;
325  ++lnumLocalAggregates;
326  procWinner(i, 0) = myRank;
327  }
328  }, tmpNumLocalAggregates);
329  numLocalAggregates += tmpNumLocalAggregates;
330 
331  // Compute the initial size of the aggregates.
332  // Note lbv 12-21-17: I am pretty sure that the aggregates will always be of size 1
333  // at this point so we could simplify the code below a lot if this
334  // assumption is correct...
335  Kokkos::View<LO*, memory_space> aggSizesView("aggSizes", numLocalAggregates);
336  {
337  // Here there is a possibility that two vertices assigned to two different threads contribute
338  // to the same aggregate if somethings happened before phase 1?
339  auto aggSizesScatterView = Kokkos::Experimental::create_scatter_view(aggSizesView);
340  Kokkos::parallel_for("Aggregation Phase 1: compute initial aggregates size",
342  KOKKOS_LAMBDA (const LO i) {
343  auto aggSizesScatterViewAccess = aggSizesScatterView.access();
344  if(vertex2AggId(i, 0) >= 0)
345  aggSizesScatterViewAccess(vertex2AggId(i, 0)) += 1;
346  });
347  Kokkos::Experimental::contribute(aggSizesView, aggSizesScatterView);
348  }
349 
350  Kokkos::parallel_reduce("Aggregation Phase 1: main parallel_reduce over aggSizes",
352  KOKKOS_LAMBDA (const size_t i, LO & lNumNonAggregatedNodes) {
353  if(colors(i) != 1
354  && (aggStat(i) == READY || aggStat(i) == NOTSEL)) {
355  // Get neighbors of vertex i and look for local, aggregated,
356  // color 1 neighbor (valid root).
357  auto neighbors = graph.getNeighborVertices(i);
358  for(LO j = 0; j < neighbors.length; ++j) {
359  auto nei = neighbors.colidx(j);
360  if(graph.isLocalNeighborVertex(nei) && colors(nei) == 1
361  && aggStat(nei) == AGGREGATED) {
362 
363  // This atomic guarentees that any other node trying to
364  // join aggregate agg has the correct size.
365  LO agg = vertex2AggId(nei, 0);
366  const LO aggSize = Kokkos::atomic_fetch_add (&aggSizesView(agg),
367  1);
368  if(aggSize < maxAggSize) {
369  //assign vertex i to aggregate with root j
370  vertex2AggId(i, 0) = agg;
371  procWinner(i, 0) = myRank;
372  aggStat(i) = AGGREGATED;
373  break;
374  } else {
375  // Decrement back the value of aggSizesView(agg)
376  Kokkos::atomic_decrement(&aggSizesView(agg));
377  }
378  }
379  }
380  }
381  if(aggStat(i) != AGGREGATED) {
382  lNumNonAggregatedNodes++;
383  if(aggStat(i) == NOTSEL) { aggStat(i) = READY; }
384  }
385  }, numNonAggregatedNodes);
386 
387  // update aggregate object
388  aggregates.SetNumAggregates(numLocalAggregates);
389  }
390 
391  template <class LocalOrdinal, class GlobalOrdinal, class Node>
392  void AggregationPhase1Algorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
393  BuildAggregatesDeterministic(const LO maxAggSize,
394  const LWGraph_kokkos& graph,
395  Aggregates_kokkos& aggregates,
397  LO& numNonAggregatedNodes) const
398  {
399  using graph_t = typename LWGraph_kokkos::local_graph_type;
400  using memory_space = typename graph_t::device_type::memory_space;
401  using execution_space = typename graph_t::device_type::execution_space;
402 
403  const LO numRows = graph.GetNodeNumVertices();
404  const int myRank = graph.GetComm()->getRank();
405 
406  auto vertex2AggId = aggregates.GetVertex2AggId()->template getLocalView<memory_space>();
407  auto procWinner = aggregates.GetProcWinner() ->template getLocalView<memory_space>();
408  auto colors = aggregates.GetGraphColors();
409 
410  LO numLocalAggregates = aggregates.GetNumAggregates();
411  Kokkos::View<LO, memory_space> numLocalAggregatesView("Num aggregates");
412  {
413  auto h_nla = Kokkos::create_mirror_view(numLocalAggregatesView);
414  h_nla() = numLocalAggregates;
415  Kokkos::deep_copy(numLocalAggregatesView, h_nla);
416  }
417 
418  Kokkos::View<LO*, memory_space> newRoots("New root LIDs", numNonAggregatedNodes);
419  Kokkos::View<LO, memory_space> numNewRoots("Number of new aggregates of current color");
420  auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots);
421 
422  //first loop build the set of new roots
423  Kokkos::parallel_for("Aggregation Phase 1: building list of new roots",
425  KOKKOS_LAMBDA(const LO i)
426  {
427  if(colors(i) == 1 && aggStat(i) == READY)
428  {
429  //i will become a root
430  newRoots(Kokkos::atomic_fetch_add(&numNewRoots(), 1)) = i;
431  }
432  });
433  Kokkos::deep_copy(h_numNewRoots, numNewRoots);
434  //sort new roots by LID to guarantee determinism in agg IDs
435  Kokkos::sort(newRoots, 0, h_numNewRoots());
436  LO numAggregated = 0;
437  Kokkos::parallel_reduce("Aggregation Phase 1: aggregating nodes",
438  Kokkos::RangePolicy<execution_space>(0, h_numNewRoots()),
439  KOKKOS_LAMBDA(const LO rootIndex, LO& lnumAggregated)
440  {
441  LO root = newRoots(rootIndex);
442  LO aggID = numLocalAggregatesView() + rootIndex;
443  LO aggSize = 1;
444  vertex2AggId(root, 0) = aggID;
445  procWinner(root, 0) = myRank;
446  aggStat(root) = AGGREGATED;
447  auto neighOfRoot = graph.getNeighborVertices(root);
448  for(LO n = 0; n < neighOfRoot.length; n++)
449  {
450  LO neigh = neighOfRoot(n);
451  if (graph.isLocalNeighborVertex(neigh) && aggStat(neigh) == READY)
452  {
453  //add neigh to aggregate
454  vertex2AggId(neigh, 0) = aggID;
455  procWinner(neigh, 0) = myRank;
456  aggStat(neigh) = AGGREGATED;
457  aggSize++;
458  if(aggSize == maxAggSize)
459  {
460  //can't add any more nodes
461  break;
462  }
463  }
464  }
465  lnumAggregated += aggSize;
466  }, numAggregated);
467  numNonAggregatedNodes -= numAggregated;
468  // update aggregate object
469  aggregates.SetNumAggregates(numLocalAggregates + h_numNewRoots());
470  }
471 
472  template <class LocalOrdinal, class GlobalOrdinal, class Node>
473  void AggregationPhase1Algorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::RandomReorder(ArrayRCP<LO> list) const {
474  //TODO: replace int
475  int n = list.size();
476  for(int i = 0; i < n-1; i++)
477  std::swap(list[i], list[RandomOrdinal(i,n-1)]);
478  }
479 
480  template <class LocalOrdinal, class GlobalOrdinal, class Node>
481  int AggregationPhase1Algorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::RandomOrdinal(int min, int max) const {
482  return min + as<int>((max-min+1) * (static_cast<double>(std::rand()) / (RAND_MAX + 1.0)));
483  }
484 
485 } // end namespace
486 
487 #endif // HAVE_MUELU_KOKKOS_REFACTOR
488 #endif // MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DEF_HPP
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=0)
T & get(const std::string &name, T def_value)
void parallel_reduce(const std::string &label, const PolicyType &policy, const FunctorType &functor, ReturnType &return_value, typename Impl::enable_if< Kokkos::Impl::is_execution_policy< PolicyType >::value >::type *=0)
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
LocalOrdinal LO
void deep_copy(const View< DT, DP...> &dst, typename ViewTraits< DT, DP...>::const_value_type &value, typename std::enable_if< std::is_same< typename ViewTraits< DT, DP...>::specialize, void >::value >::type *=0)
bool isParameter(const std::string &name) const
KOKKOS_INLINE_FUNCTION constexpr std::enable_if< std::is_integral< iType >::value, size_t >::type extent(const iType &r) const noexcept