MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_AggregationPhase1Algorithm_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DEF_HPP
47 #define MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DEF_HPP
48 
49 #include <queue>
50 #include <vector>
51 
52 #include <Teuchos_Comm.hpp>
53 #include <Teuchos_CommHelpers.hpp>
54 
55 #include <Xpetra_Vector.hpp>
56 
58 
59 #include "MueLu_Aggregates.hpp"
60 #include "MueLu_Exceptions.hpp"
61 #include "MueLu_LWGraph_kokkos.hpp"
62 #include "MueLu_Monitor.hpp"
63 
64 #include "Kokkos_Sort.hpp"
65 #include <Kokkos_ScatterView.hpp>
66 
67 namespace MueLu {
68 
69 template <class LocalOrdinal, class GlobalOrdinal, class Node>
72  const LWGraph_kokkos& graph,
73  Aggregates& aggregates,
74  Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
75  LO& numNonAggregatedNodes) const {
76  int minNodesPerAggregate = params.get<int>("aggregation: min agg size");
77  int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
78 
79  TEUCHOS_TEST_FOR_EXCEPTION(maxNodesPerAggregate < minNodesPerAggregate,
81  "MueLu::UncoupledAggregationAlgorithm::BuildAggregates: minNodesPerAggregate must be smaller or equal to MaxNodePerAggregate!");
82 
83  // Distance-2 gives less control than serial uncoupled phase 1
84  // no custom row reordering because would require making deep copy
85  // of local matrix entries and permuting it can only enforce
86  // max aggregate size
87  {
88  if (params.get<bool>("aggregation: deterministic")) {
89  Monitor m(*this, "BuildAggregatesDeterministic");
90  BuildAggregatesDeterministic(maxNodesPerAggregate, graph,
91  aggregates, aggStat, numNonAggregatedNodes);
92  } else {
93  Monitor m(*this, "BuildAggregatesRandom");
94  BuildAggregatesRandom(maxNodesPerAggregate, graph,
95  aggregates, aggStat, numNonAggregatedNodes);
96  }
97  }
98 }
99 
100 template <class LocalOrdinal, class GlobalOrdinal, class Node>
102  BuildAggregatesRandom(const LO maxAggSize,
103  const LWGraph_kokkos& graph,
104  Aggregates& aggregates,
105  Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
106  LO& numNonAggregatedNodes) const {
107  const LO numRows = graph.GetNodeNumVertices();
108  const int myRank = graph.GetComm()->getRank();
109 
110  // Extract data from aggregates
111  auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
112  auto procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
113  auto colors = aggregates.GetGraphColors();
114 
115  auto lclLWGraph = graph;
116 
117  LO numAggregatedNodes = 0;
118  LO numLocalAggregates = aggregates.GetNumAggregates();
119  Kokkos::View<LO, device_type> aggCount("aggCount");
120  Kokkos::deep_copy(aggCount, numLocalAggregates);
121  Kokkos::parallel_for(
122  "Aggregation Phase 1: initial reduction over color == 1",
123  Kokkos::RangePolicy<LO, execution_space>(0, numRows),
124  KOKKOS_LAMBDA(const LO nodeIdx) {
125  if (colors(nodeIdx) == 1 && aggStat(nodeIdx) == READY) {
126  const LO aggIdx = Kokkos::atomic_fetch_add(&aggCount(), 1);
127  vertex2AggId(nodeIdx, 0) = aggIdx;
128  aggStat(nodeIdx) = AGGREGATED;
129  procWinner(nodeIdx, 0) = myRank;
130  }
131  });
132  // Truely we wish to compute: numAggregatedNodes = aggCount - numLocalAggregates
133  // before updating the value of numLocalAggregates.
134  // But since we also do not want to create a host mirror of aggCount we do some trickery...
135  numAggregatedNodes -= numLocalAggregates;
136  Kokkos::deep_copy(numLocalAggregates, aggCount);
137  numAggregatedNodes += numLocalAggregates;
138 
139  // Compute the initial size of the aggregates.
140  // Note lbv 12-21-17: I am pretty sure that the aggregates will always be of size 1
141  // at this point so we could simplify the code below a lot if this
142  // assumption is correct...
143  Kokkos::View<LO*, device_type> aggSizesView("aggSizes", numLocalAggregates);
144  {
145  // Here there is a possibility that two vertices assigned to two different threads contribute
146  // to the same aggregate if somethings happened before phase 1?
147  auto aggSizesScatterView = Kokkos::Experimental::create_scatter_view(aggSizesView);
148  Kokkos::parallel_for(
149  "Aggregation Phase 1: compute initial aggregates size",
150  Kokkos::RangePolicy<LO, execution_space>(0, numRows),
151  KOKKOS_LAMBDA(const LO nodeIdx) {
152  auto aggSizesScatterViewAccess = aggSizesScatterView.access();
153  if (vertex2AggId(nodeIdx, 0) >= 0)
154  aggSizesScatterViewAccess(vertex2AggId(nodeIdx, 0)) += 1;
155  });
156  Kokkos::Experimental::contribute(aggSizesView, aggSizesScatterView);
157  }
158 
159  LO tmpNumAggregatedNodes = 0;
160  Kokkos::parallel_reduce(
161  "Aggregation Phase 1: main parallel_reduce over aggSizes",
162  Kokkos::RangePolicy<size_t, execution_space>(0, numRows),
163  KOKKOS_LAMBDA(const size_t nodeIdx, LO& lNumAggregatedNodes) {
164  if (colors(nodeIdx) != 1 && (aggStat(nodeIdx) == READY || aggStat(nodeIdx) == NOTSEL)) {
165  // Get neighbors of vertex i and look for local, aggregated,
166  // color 1 neighbor (valid root).
167  auto neighbors = lclLWGraph.getNeighborVertices(nodeIdx);
168  for (LO j = 0; j < neighbors.length; ++j) {
169  auto nei = neighbors.colidx(j);
170  if (lclLWGraph.isLocalNeighborVertex(nei) && colors(nei) == 1 && aggStat(nei) == AGGREGATED) {
171  // This atomic guarentees that any other node trying to
172  // join aggregate agg has the correct size.
173  LO agg = vertex2AggId(nei, 0);
174  const LO aggSize = Kokkos::atomic_fetch_add(&aggSizesView(agg),
175  1);
176  if (aggSize < maxAggSize) {
177  // assign vertex i to aggregate with root j
178  vertex2AggId(nodeIdx, 0) = agg;
179  procWinner(nodeIdx, 0) = myRank;
180  aggStat(nodeIdx) = AGGREGATED;
181  ++lNumAggregatedNodes;
182  break;
183  } else {
184  // Decrement back the value of aggSizesView(agg)
185  Kokkos::atomic_decrement(&aggSizesView(agg));
186  }
187  }
188  }
189  }
190  // if(aggStat(nodeIdx) != AGGREGATED) {
191  // lNumNonAggregatedNodes++;
192  if (aggStat(nodeIdx) == NOTSEL) {
193  aggStat(nodeIdx) = READY;
194  }
195  // }
196  },
197  tmpNumAggregatedNodes);
198  numAggregatedNodes += tmpNumAggregatedNodes;
199  numNonAggregatedNodes -= numAggregatedNodes;
200 
201  // update aggregate object
202  aggregates.SetNumAggregates(numLocalAggregates);
203 }
204 
205 template <class LocalOrdinal, class GlobalOrdinal, class Node>
208  const LWGraph_kokkos& graph,
209  Aggregates& aggregates,
210  Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
211  LO& numNonAggregatedNodes) const {
212  const LO numRows = graph.GetNodeNumVertices();
213  const int myRank = graph.GetComm()->getRank();
214 
215  auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
216  auto procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
217  auto colors = aggregates.GetGraphColors();
218 
219  auto lclLWGraph = graph;
220 
221  LO numLocalAggregates = aggregates.GetNumAggregates();
222  Kokkos::View<LO, device_type> numLocalAggregatesView("Num aggregates");
223  {
224  auto h_nla = Kokkos::create_mirror_view(numLocalAggregatesView);
225  h_nla() = numLocalAggregates;
226  Kokkos::deep_copy(numLocalAggregatesView, h_nla);
227  }
228 
229  Kokkos::View<LO*, device_type> newRoots("New root LIDs", numNonAggregatedNodes);
230  Kokkos::View<LO, device_type> numNewRoots("Number of new aggregates of current color");
231  auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots);
232 
233  // first loop build the set of new roots
234  Kokkos::parallel_for(
235  "Aggregation Phase 1: building list of new roots",
236  Kokkos::RangePolicy<execution_space>(0, numRows),
237  KOKKOS_LAMBDA(const LO i) {
238  if (colors(i) == 1 && aggStat(i) == READY) {
239  // i will become a root
240  newRoots(Kokkos::atomic_fetch_add(&numNewRoots(), 1)) = i;
241  }
242  });
243  Kokkos::deep_copy(h_numNewRoots, numNewRoots);
244  // sort new roots by LID to guarantee determinism in agg IDs
245  Kokkos::sort(newRoots, 0, h_numNewRoots());
246  LO numAggregated = 0;
247  Kokkos::parallel_reduce(
248  "Aggregation Phase 1: aggregating nodes",
249  Kokkos::RangePolicy<execution_space>(0, h_numNewRoots()),
250  KOKKOS_LAMBDA(const LO rootIndex, LO& lnumAggregated) {
251  LO root = newRoots(rootIndex);
252  LO aggID = numLocalAggregatesView() + rootIndex;
253  LO aggSize = 1;
254  vertex2AggId(root, 0) = aggID;
255  procWinner(root, 0) = myRank;
256  aggStat(root) = AGGREGATED;
257  auto neighOfRoot = lclLWGraph.getNeighborVertices(root);
258  for (LO n = 0; n < neighOfRoot.length; n++) {
259  LO neigh = neighOfRoot(n);
260  if (lclLWGraph.isLocalNeighborVertex(neigh) && aggStat(neigh) == READY) {
261  // add neigh to aggregate
262  vertex2AggId(neigh, 0) = aggID;
263  procWinner(neigh, 0) = myRank;
264  aggStat(neigh) = AGGREGATED;
265  aggSize++;
266  if (aggSize == maxAggSize) {
267  // can't add any more nodes
268  break;
269  }
270  }
271  }
272  lnumAggregated += aggSize;
273  },
274  numAggregated);
275  numNonAggregatedNodes -= numAggregated;
276  // update aggregate object
277  aggregates.SetNumAggregates(numLocalAggregates + h_numNewRoots());
278 }
279 
280 } // namespace MueLu
281 
282 #endif // MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DEF_HPP
void BuildAggregatesRandom(const LO maxAggSize, const LWGraph_kokkos &graph, Aggregates &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
void BuildAggregatesDeterministic(const LO maxAggSize, const LWGraph_kokkos &graph, Aggregates &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
Lightweight MueLu representation of a compressed row storage graph.
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
Container class for aggregation information.
KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const
T & get(const std::string &name, T def_value)
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
LocalOrdinal LO
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
colors_view_type & GetGraphColors()
Get a distance 2 coloring of the underlying graph. The coloring is computed and set during Phase1 of ...
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
void BuildAggregates(const Teuchos::ParameterList &params, const LWGraph_kokkos &graph, Aggregates &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
Local aggregation.
Timer to be used in non-factories.
Exception throws to report errors in the internal logical of the program.
const RCP< const Teuchos::Comm< int > > GetComm() const
void SetNumAggregates(LO nAggregates)
Set number of local aggregates on current processor.