MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_AggregationPhase1Algorithm_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DEF_HPP
47 #define MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DEF_HPP
48 
49 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
50 
51 #include <queue>
52 #include <vector>
53 
54 #include <Teuchos_Comm.hpp>
55 #include <Teuchos_CommHelpers.hpp>
56 
57 #include <Xpetra_Vector.hpp>
58 
60 
61 #include "MueLu_Aggregates_kokkos.hpp"
62 #include "MueLu_Exceptions.hpp"
63 #include "MueLu_LWGraph_kokkos.hpp"
64 #include "MueLu_Monitor.hpp"
65 
66 #include "Kokkos_Sort.hpp"
67 #include <Kokkos_ScatterView.hpp>
68 
69 namespace MueLu {
70 
71  template <class LocalOrdinal, class GlobalOrdinal, class Node>
72  void AggregationPhase1Algorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
73  BuildAggregates(const Teuchos::ParameterList& params,
74  const LWGraph_kokkos& graph,
75  Aggregates_kokkos& aggregates,
77  LO& numNonAggregatedNodes) const {
78 
79  int minNodesPerAggregate = params.get<int> ("aggregation: min agg size");
80  int maxNodesPerAggregate = params.get<int> ("aggregation: max agg size");
81 
82  TEUCHOS_TEST_FOR_EXCEPTION(maxNodesPerAggregate < minNodesPerAggregate,
83  Exceptions::RuntimeError,
84  "MueLu::UncoupledAggregationAlgorithm::BuildAggregates: minNodesPerAggregate must be smaller or equal to MaxNodePerAggregate!");
85 
86  // Distance-2 gives less control than serial uncoupled phase 1
87  // no custom row reordering because would require making deep copy
88  // of local matrix entries and permuting it can only enforce
89  // max aggregate size
90  {
91  if(params.get<bool>("aggregation: deterministic"))
92  {
93  Monitor m(*this, "BuildAggregatesDeterministic");
94  BuildAggregatesDeterministic(maxNodesPerAggregate, graph,
95  aggregates, aggStat, numNonAggregatedNodes);
96  } else {
97  Monitor m(*this, "BuildAggregatesRandom");
98  BuildAggregatesRandom(maxNodesPerAggregate, graph,
99  aggregates, aggStat, numNonAggregatedNodes);
100  }
101  }
102  }
103 
104  template <class LocalOrdinal, class GlobalOrdinal, class Node>
105  void AggregationPhase1Algorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
106  BuildAggregatesRandom(const LO maxAggSize,
107  const LWGraph_kokkos& graph,
108  Aggregates_kokkos& aggregates,
110  LO& numNonAggregatedNodes) const
111  {
112  const LO numRows = graph.GetNodeNumVertices();
113  const int myRank = graph.GetComm()->getRank();
114 
115  // Extract data from aggregates
116  auto vertex2AggId = aggregates.GetVertex2AggId()->template getLocalView<memory_space>();
117  auto procWinner = aggregates.GetProcWinner() ->template getLocalView<memory_space>();
118  auto colors = aggregates.GetGraphColors();
119 
120  LO numAggregatedNodes = 0;
121  LO numLocalAggregates = aggregates.GetNumAggregates();
122  Kokkos::View<LO, memory_space> aggCount("aggCount");
123  Kokkos::deep_copy(aggCount, numLocalAggregates);
124  Kokkos::parallel_for("Aggregation Phase 1: initial reduction over color == 1",
126  KOKKOS_LAMBDA (const LO nodeIdx) {
127  if(colors(nodeIdx) == 1 && aggStat(nodeIdx) == READY) {
128  const LO aggIdx = Kokkos::atomic_fetch_add (&aggCount(), 1);
129  vertex2AggId(nodeIdx, 0) = aggIdx;
130  aggStat(nodeIdx) = AGGREGATED;
131  procWinner(nodeIdx, 0) = myRank;
132  }
133  });
134  // Truely we wish to compute: numAggregatedNodes = aggCount - numLocalAggregates
135  // before updating the value of numLocalAggregates.
136  // But since we also do not want to create a host mirror of aggCount we do some trickery...
137  numAggregatedNodes -= numLocalAggregates;
138  Kokkos::deep_copy(numLocalAggregates, aggCount);
139  numAggregatedNodes += numLocalAggregates;
140 
141  // Compute the initial size of the aggregates.
142  // Note lbv 12-21-17: I am pretty sure that the aggregates will always be of size 1
143  // at this point so we could simplify the code below a lot if this
144  // assumption is correct...
145  Kokkos::View<LO*, memory_space> aggSizesView("aggSizes", numLocalAggregates);
146  {
147  // Here there is a possibility that two vertices assigned to two different threads contribute
148  // to the same aggregate if somethings happened before phase 1?
149  auto aggSizesScatterView = Kokkos::Experimental::create_scatter_view(aggSizesView);
150  Kokkos::parallel_for("Aggregation Phase 1: compute initial aggregates size",
152  KOKKOS_LAMBDA (const LO nodeIdx) {
153  auto aggSizesScatterViewAccess = aggSizesScatterView.access();
154  if(vertex2AggId(nodeIdx, 0) >= 0)
155  aggSizesScatterViewAccess(vertex2AggId(nodeIdx, 0)) += 1;
156  });
157  Kokkos::Experimental::contribute(aggSizesView, aggSizesScatterView);
158  }
159 
160  LO tmpNumAggregatedNodes = 0;
161  Kokkos::parallel_reduce("Aggregation Phase 1: main parallel_reduce over aggSizes",
163  KOKKOS_LAMBDA (const size_t nodeIdx, LO & lNumAggregatedNodes) {
164  if(colors(nodeIdx) != 1
165  && (aggStat(nodeIdx) == READY || aggStat(nodeIdx) == NOTSEL)) {
166  // Get neighbors of vertex i and look for local, aggregated,
167  // color 1 neighbor (valid root).
168  auto neighbors = graph.getNeighborVertices(nodeIdx);
169  for(LO j = 0; j < neighbors.length; ++j) {
170  auto nei = neighbors.colidx(j);
171  if(graph.isLocalNeighborVertex(nei) && colors(nei) == 1
172  && aggStat(nei) == AGGREGATED) {
173 
174  // This atomic guarentees that any other node trying to
175  // join aggregate agg has the correct size.
176  LO agg = vertex2AggId(nei, 0);
177  const LO aggSize = Kokkos::atomic_fetch_add (&aggSizesView(agg),
178  1);
179  if(aggSize < maxAggSize) {
180  //assign vertex i to aggregate with root j
181  vertex2AggId(nodeIdx, 0) = agg;
182  procWinner(nodeIdx, 0) = myRank;
183  aggStat(nodeIdx) = AGGREGATED;
184  ++lNumAggregatedNodes;
185  break;
186  } else {
187  // Decrement back the value of aggSizesView(agg)
188  Kokkos::atomic_decrement(&aggSizesView(agg));
189  }
190  }
191  }
192  }
193  // if(aggStat(nodeIdx) != AGGREGATED) {
194  // lNumNonAggregatedNodes++;
195  if(aggStat(nodeIdx) == NOTSEL) { aggStat(nodeIdx) = READY; }
196  // }
197  }, tmpNumAggregatedNodes);
198  numAggregatedNodes += tmpNumAggregatedNodes;
199  numNonAggregatedNodes -= numAggregatedNodes;
200 
201  // update aggregate object
202  aggregates.SetNumAggregates(numLocalAggregates);
203  }
204 
205  template <class LocalOrdinal, class GlobalOrdinal, class Node>
206  void AggregationPhase1Algorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
207  BuildAggregatesDeterministic(const LO maxAggSize,
208  const LWGraph_kokkos& graph,
209  Aggregates_kokkos& aggregates,
211  LO& numNonAggregatedNodes) const
212  {
213  const LO numRows = graph.GetNodeNumVertices();
214  const int myRank = graph.GetComm()->getRank();
215 
216  auto vertex2AggId = aggregates.GetVertex2AggId()->template getLocalView<memory_space>();
217  auto procWinner = aggregates.GetProcWinner() ->template getLocalView<memory_space>();
218  auto colors = aggregates.GetGraphColors();
219 
220  LO numLocalAggregates = aggregates.GetNumAggregates();
221  Kokkos::View<LO, memory_space> numLocalAggregatesView("Num aggregates");
222  {
223  auto h_nla = Kokkos::create_mirror_view(numLocalAggregatesView);
224  h_nla() = numLocalAggregates;
225  Kokkos::deep_copy(numLocalAggregatesView, h_nla);
226  }
227 
228  Kokkos::View<LO*, memory_space> newRoots("New root LIDs", numNonAggregatedNodes);
229  Kokkos::View<LO, memory_space> numNewRoots("Number of new aggregates of current color");
230  auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots);
231 
232  //first loop build the set of new roots
233  Kokkos::parallel_for("Aggregation Phase 1: building list of new roots",
235  KOKKOS_LAMBDA(const LO i)
236  {
237  if(colors(i) == 1 && aggStat(i) == READY)
238  {
239  //i will become a root
240  newRoots(Kokkos::atomic_fetch_add(&numNewRoots(), 1)) = i;
241  }
242  });
243  Kokkos::deep_copy(h_numNewRoots, numNewRoots);
244  //sort new roots by LID to guarantee determinism in agg IDs
245  Kokkos::sort(newRoots, 0, h_numNewRoots());
246  LO numAggregated = 0;
247  Kokkos::parallel_reduce("Aggregation Phase 1: aggregating nodes",
248  Kokkos::RangePolicy<execution_space>(0, h_numNewRoots()),
249  KOKKOS_LAMBDA(const LO rootIndex, LO& lnumAggregated)
250  {
251  LO root = newRoots(rootIndex);
252  LO aggID = numLocalAggregatesView() + rootIndex;
253  LO aggSize = 1;
254  vertex2AggId(root, 0) = aggID;
255  procWinner(root, 0) = myRank;
256  aggStat(root) = AGGREGATED;
257  auto neighOfRoot = graph.getNeighborVertices(root);
258  for(LO n = 0; n < neighOfRoot.length; n++)
259  {
260  LO neigh = neighOfRoot(n);
261  if (graph.isLocalNeighborVertex(neigh) && aggStat(neigh) == READY)
262  {
263  //add neigh to aggregate
264  vertex2AggId(neigh, 0) = aggID;
265  procWinner(neigh, 0) = myRank;
266  aggStat(neigh) = AGGREGATED;
267  aggSize++;
268  if(aggSize == maxAggSize)
269  {
270  //can't add any more nodes
271  break;
272  }
273  }
274  }
275  lnumAggregated += aggSize;
276  }, numAggregated);
277  numNonAggregatedNodes -= numAggregated;
278  // update aggregate object
279  aggregates.SetNumAggregates(numLocalAggregates + h_numNewRoots());
280  }
281 
282 } // end namespace
283 
284 #endif // HAVE_MUELU_KOKKOS_REFACTOR
285 #endif // MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DEF_HPP
void deep_copy(const View< DT, DP...> &dst, typename ViewTraits< DT, DP...>::const_value_type &value, typename std::enable_if< std::is_same< typename ViewTraits< DT, DP...>::specialize, void >::value >::type *=nullptr)
T & get(const std::string &name, T def_value)
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
void parallel_reduce(const std::string &label, const PolicyType &policy, const FunctorType &functor, ReturnType &return_value, typename std::enable_if< Kokkos::Impl::is_execution_policy< PolicyType >::value >::type *=nullptr)
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename std::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=nullptr)