46 #ifndef MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DEF_HPP
47 #define MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DEF_HPP
49 #include <Teuchos_Comm.hpp>
50 #include <Teuchos_CommHelpers.hpp>
56 #include "MueLu_Aggregates.hpp"
58 #include "MueLu_LWGraph_kokkos.hpp"
61 #include "Kokkos_Sort.hpp"
65 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
70 Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
71 LO& numNonAggregatedNodes)
const {
72 if (params.
get<
bool>(
"aggregation: deterministic")) {
73 Monitor m(*
this,
"BuildAggregatesDeterministic");
74 BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes);
76 Monitor m(*
this,
"BuildAggregatesRandom");
77 BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes);
82 template <
class LO,
class GO,
class Node>
87 Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
88 LO& numNonAggregatedNodes)
const {
89 const int minNodesPerAggregate = params.
get<
int>(
"aggregation: min agg size");
90 const int maxNodesPerAggregate = params.
get<
int>(
"aggregation: max agg size");
91 bool matchMLbehavior = params.
get<
bool>(
"aggregation: match ML phase2a");
94 const int myRank = graph.
GetComm()->getRank();
96 auto vertex2AggId = aggregates.
GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
97 auto procWinner = aggregates.
GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
101 auto lclLWGraph = graph;
103 LO numLocalNodes = numRows;
104 LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
106 const double aggFactor = 0.5;
107 double factor =
static_cast<double>(numLocalAggregated) / (numLocalNodes + 1);
108 factor = pow(factor, aggFactor);
114 Kokkos::View<LO, device_type> numLocalAggregates(
"numLocalAggregates");
115 typename Kokkos::View<LO, device_type>::HostMirror h_numLocalAggregates =
116 Kokkos::create_mirror_view(numLocalAggregates);
118 Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
122 for (
int color = 2; color < numColors + 1; ++color) {
123 LO tmpNumNonAggregatedNodes = 0;
124 Kokkos::parallel_reduce(
125 "Aggregation Phase 2a: loop over each individual color",
126 Kokkos::RangePolicy<execution_space>(0, numRows),
127 KOKKOS_LAMBDA(
const LO rootCandidate,
LO& lNumNonAggregatedNodes) {
128 if (aggStat(rootCandidate) ==
READY &&
129 colors(rootCandidate) == color) {
132 if (matchMLbehavior) {
137 auto neighbors = lclLWGraph.getNeighborVertices(rootCandidate);
142 for (
int j = 0; j < neighbors.length; ++j) {
143 LO neigh = neighbors(j);
144 if (neigh != rootCandidate) {
145 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
146 (aggStat(neigh) ==
READY) &&
147 (aggSize < maxNodesPerAggregate)) {
156 if (aggSize > minNodesPerAggregate &&
157 (aggSize > factor * numNeighbors)) {
159 LO aggIndex = Kokkos::
160 atomic_fetch_add(&numLocalAggregates(), 1);
162 LO numAggregated = 0;
164 if (matchMLbehavior) {
167 vertex2AggId(rootCandidate, 0) = aggIndex;
168 procWinner(rootCandidate, 0) = myRank;
170 --lNumNonAggregatedNodes;
173 for (
int neighIdx = 0; neighIdx < neighbors.length; ++neighIdx) {
174 LO neigh = neighbors(neighIdx);
175 if (neigh != rootCandidate) {
176 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
177 (aggStat(neigh) ==
READY) &&
178 (numAggregated < aggSize)) {
180 vertex2AggId(neigh, 0) = aggIndex;
181 procWinner(neigh, 0) = myRank;
184 --lNumNonAggregatedNodes;
191 tmpNumNonAggregatedNodes);
192 numNonAggregatedNodes += tmpNumNonAggregatedNodes;
196 Kokkos::deep_copy(h_numLocalAggregates, numLocalAggregates);
200 template <
class LO,
class GO,
class Node>
205 Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
206 LO& numNonAggregatedNodes)
const {
207 const int minNodesPerAggregate = params.
get<
int>(
"aggregation: min agg size");
208 const int maxNodesPerAggregate = params.
get<
int>(
"aggregation: max agg size");
211 const int myRank = graph.
GetComm()->getRank();
213 auto vertex2AggId = aggregates.
GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
214 auto procWinner = aggregates.
GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
218 auto lclLWGraph = graph;
220 LO numLocalNodes = procWinner.size();
221 LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
223 const double aggFactor = 0.5;
224 double factor = as<double>(numLocalAggregated) / (numLocalNodes + 1);
225 factor = pow(factor, aggFactor);
227 Kokkos::View<LO, device_type> numLocalAggregates(
"numLocalAggregates");
228 typename Kokkos::View<LO, device_type>::HostMirror h_numLocalAggregates =
229 Kokkos::create_mirror_view(numLocalAggregates);
231 Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
246 Kokkos::View<LO*, device_type> newRoots(
"New root LIDs", numNonAggregatedNodes);
247 Kokkos::View<LO, device_type> numNewRoots(
"Number of new aggregates of current color");
248 auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots);
249 for (
int color = 1; color < numColors + 1; ++color) {
251 Kokkos::deep_copy(numNewRoots, h_numNewRoots);
252 Kokkos::parallel_for(
253 "Aggregation Phase 2a: determining new roots of current color",
254 Kokkos::RangePolicy<execution_space>(0, numRows),
255 KOKKOS_LAMBDA(
const LO rootCandidate) {
256 if (aggStat(rootCandidate) ==
READY &&
257 colors(rootCandidate) == color) {
259 auto neighbors = lclLWGraph.getNeighborVertices(rootCandidate);
263 for (
int j = 0; j < neighbors.length; ++j) {
264 LO neigh = neighbors(j);
265 if (neigh != rootCandidate) {
266 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
267 aggStat(neigh) ==
READY &&
268 aggSize < maxNodesPerAggregate) {
276 if (aggSize > minNodesPerAggregate && aggSize > factor * numNeighbors) {
277 LO newRootIndex = Kokkos::atomic_fetch_add(&numNewRoots(), 1);
278 newRoots(newRootIndex) = rootCandidate;
282 Kokkos::deep_copy(h_numNewRoots, numNewRoots);
284 if (h_numNewRoots() > 0) {
286 Kokkos::sort(newRoots, 0, h_numNewRoots());
288 LO tmpNumNonAggregatedNodes = 0;
290 Kokkos::parallel_reduce(
291 "Aggregation Phase 2a: create new aggregates",
292 Kokkos::RangePolicy<execution_space>(0, h_numNewRoots()),
293 KOKKOS_LAMBDA(
const LO newRootIndex,
LO& lNumNonAggregatedNodes) {
294 LO root = newRoots(newRootIndex);
295 LO newAggID = numLocalAggregates() + newRootIndex;
296 auto neighbors = lclLWGraph.getNeighborVertices(root);
299 vertex2AggId(root, 0) = newAggID;
301 for (
int j = 0; j < neighbors.length; ++j) {
302 LO neigh = neighbors(j);
304 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
305 aggStat(neigh) ==
READY &&
306 aggSize < maxNodesPerAggregate) {
308 vertex2AggId(neigh, 0) = newAggID;
309 procWinner(neigh, 0) = myRank;
314 lNumNonAggregatedNodes -= aggSize;
316 tmpNumNonAggregatedNodes);
317 numNonAggregatedNodes += tmpNumNonAggregatedNodes;
318 h_numLocalAggregates() += h_numNewRoots();
319 Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
327 #endif // MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DEF_HPP
void BuildAggregates(const Teuchos::ParameterList ¶ms, const LWGraph_kokkos &graph, Aggregates &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
Local aggregation.
Lightweight MueLu representation of a compressed row storage graph.
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
Container class for aggregation information.
KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const
T & get(const std::string &name, T def_value)
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
LO GetGraphNumColors()
Get the number of colors needed by the distance 2 coloring.
void BuildAggregatesRandom(const Teuchos::ParameterList ¶ms, const LWGraph_kokkos &graph, Aggregates &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
colors_view_type & GetGraphColors()
Get a distance 2 coloring of the underlying graph. The coloring is computed and set during Phase1 of ...
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
void BuildAggregatesDeterministic(const Teuchos::ParameterList ¶ms, const LWGraph_kokkos &graph, Aggregates &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
Timer to be used in non-factories.
const RCP< const Teuchos::Comm< int > > GetComm() const
void SetNumAggregates(LO nAggregates)
Set number of local aggregates on current processor.