46 #ifndef MUELU_AGGREGATIONPHASE2AALGORITHM_DEF_HPP_
47 #define MUELU_AGGREGATIONPHASE2AALGORITHM_DEF_HPP_
49 #include <Teuchos_Comm.hpp>
50 #include <Teuchos_CommHelpers.hpp>
56 #include "MueLu_LWGraph.hpp"
57 #include "MueLu_Aggregates.hpp"
61 #include "Kokkos_Sort.hpp"
65 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
67 Monitor m(*
this,
"BuildAggregatesNonKokkos");
69 int minNodesPerAggregate = params.
get<
int>(
"aggregation: min agg size");
70 int maxNodesPerAggregate = params.
get<
int>(
"aggregation: max agg size");
71 bool matchMLbehavior = params.
get<
bool>(
"aggregation: match ML phase2a");
74 const int myRank = graph.
GetComm()->getRank();
81 LO numLocalNodes = procWinner.
size();
82 LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
84 const double aggFactor = params.
get<
double>(
"aggregation: phase2a agg factor");
87 if (matchMLbehavior) {
90 GO in_data[2] = {(
GO)numNonAggregatedNodes, (
GO)aggStat.size()};
93 GO phase_one_aggregated = out_data[1] - out_data[0];
94 factor = as<double>(phase_one_aggregated) / (out_data[1] + 1);
96 LO agg_stat_unaggregated = 0;
97 LO agg_stat_aggregated = 0;
99 for (
LO i = 0; i < (
LO)aggStat.size(); i++) {
101 agg_stat_aggregated++;
105 agg_stat_unaggregated++;
109 minNodesPerAggregate = 3;
113 factor = as<double>(numLocalAggregated) / (numLocalNodes + 1);
117 factor = pow(factor, aggFactor);
123 for (
LO rootCandidate = 0; rootCandidate < numRows; rootCandidate++) {
124 if (aggStat[rootCandidate] !=
READY) {
130 if (matchMLbehavior) {
131 aggList[aggSize++] = rootCandidate;
137 LO num_nonaggd_neighbors = 0, num_local_neighbors = 0;
138 for (
int j = 0; j < neighOfINode.length; j++) {
139 LO neigh = neighOfINode(j);
141 num_local_neighbors++;
143 if (neigh != rootCandidate) {
150 if (aggSize < as<size_t>(maxNodesPerAggregate))
151 aggList[aggSize++] = neigh;
152 num_nonaggd_neighbors++;
159 bool accept_aggregate;
160 if (matchMLbehavior) {
165 LO rowi_N = num_local_neighbors;
166 num_nonaggd_neighbors++;
167 accept_aggregate = (rowi_N > as<LO>(minNodesPerAggregate)) && (num_nonaggd_neighbors > (factor * rowi_N));
169 accept_aggregate = (aggSize > as<size_t>(minNodesPerAggregate)) && (aggSize > factor * numNeighbors);
172 if (accept_aggregate) {
176 aggIndex = numLocalAggregates++;
178 for (
size_t k = 0; k < aggSize; k++) {
180 vertex2AggId[aggList[k]] = aggIndex;
181 procWinner[aggList[k]] = myRank;
184 numNonAggregatedNodes -= aggSize;
192 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
198 LO& numNonAggregatedNodes)
const {
199 if (params.
get<
bool>(
"aggregation: deterministic")) {
200 Monitor m(*
this,
"BuildAggregatesDeterministic");
201 BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes);
203 Monitor m(*
this,
"BuildAggregatesRandom");
204 BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes);
209 template <
class LO,
class GO,
class Node>
215 LO& numNonAggregatedNodes)
const {
219 const int minNodesPerAggregate = params.
get<
int>(
"aggregation: min agg size");
220 const int maxNodesPerAggregate = params.
get<
int>(
"aggregation: max agg size");
221 bool matchMLbehavior = params.
get<
bool>(
"aggregation: match ML phase2a");
224 const int myRank = graph.
GetComm()->getRank();
226 auto vertex2AggId = aggregates.
GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
227 auto procWinner = aggregates.
GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
231 auto lclLWGraph = graph;
233 LO numLocalNodes = numRows;
234 LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
236 const double aggFactor = 0.5;
237 double factor =
static_cast<double>(numLocalAggregated) / (numLocalNodes + 1);
238 factor = pow(factor, aggFactor);
244 Kokkos::View<LO, device_type> numLocalAggregates(
"numLocalAggregates");
245 typename Kokkos::View<LO, device_type>::HostMirror h_numLocalAggregates =
246 Kokkos::create_mirror_view(numLocalAggregates);
248 Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
252 for (
int color = 2; color < numColors + 1; ++color) {
253 LO tmpNumNonAggregatedNodes = 0;
254 Kokkos::parallel_reduce(
255 "Aggregation Phase 2a: loop over each individual color",
256 Kokkos::RangePolicy<execution_space>(0, numRows),
257 KOKKOS_LAMBDA(
const LO rootCandidate,
LO& lNumNonAggregatedNodes) {
258 if (aggStat(rootCandidate) ==
READY &&
259 colors(rootCandidate) == color) {
262 if (matchMLbehavior) {
267 auto neighbors = lclLWGraph.getNeighborVertices(rootCandidate);
272 for (
int j = 0; j < neighbors.length; ++j) {
273 LO neigh = neighbors(j);
274 if (neigh != rootCandidate) {
275 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
276 (aggStat(neigh) ==
READY) &&
277 (aggSize < maxNodesPerAggregate)) {
286 if (aggSize > minNodesPerAggregate &&
287 (aggSize > factor * numNeighbors)) {
289 LO aggIndex = Kokkos::
290 atomic_fetch_add(&numLocalAggregates(), 1);
292 LO numAggregated = 0;
294 if (matchMLbehavior) {
297 vertex2AggId(rootCandidate, 0) = aggIndex;
298 procWinner(rootCandidate, 0) = myRank;
300 --lNumNonAggregatedNodes;
303 for (
int neighIdx = 0; neighIdx < neighbors.length; ++neighIdx) {
304 LO neigh = neighbors(neighIdx);
305 if (neigh != rootCandidate) {
306 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
307 (aggStat(neigh) ==
READY) &&
308 (numAggregated < aggSize)) {
310 vertex2AggId(neigh, 0) = aggIndex;
311 procWinner(neigh, 0) = myRank;
314 --lNumNonAggregatedNodes;
321 tmpNumNonAggregatedNodes);
322 numNonAggregatedNodes += tmpNumNonAggregatedNodes;
326 Kokkos::deep_copy(h_numLocalAggregates, numLocalAggregates);
330 template <
class LO,
class GO,
class Node>
336 LO& numNonAggregatedNodes)
const {
340 const int minNodesPerAggregate = params.
get<
int>(
"aggregation: min agg size");
341 const int maxNodesPerAggregate = params.
get<
int>(
"aggregation: max agg size");
344 const int myRank = graph.
GetComm()->getRank();
346 auto vertex2AggId = aggregates.
GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
347 auto procWinner = aggregates.
GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
351 auto lclLWGraph = graph;
353 LO numLocalNodes = procWinner.size();
354 LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
356 const double aggFactor = 0.5;
357 double factor = as<double>(numLocalAggregated) / (numLocalNodes + 1);
358 factor = pow(factor, aggFactor);
360 Kokkos::View<LO, device_type> numLocalAggregates(
"numLocalAggregates");
361 typename Kokkos::View<LO, device_type>::HostMirror h_numLocalAggregates =
362 Kokkos::create_mirror_view(numLocalAggregates);
364 Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
379 Kokkos::View<LO*, device_type> newRoots(
"New root LIDs", numNonAggregatedNodes);
380 Kokkos::View<LO, device_type> numNewRoots(
"Number of new aggregates of current color");
381 auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots);
382 for (
int color = 1; color < numColors + 1; ++color) {
384 Kokkos::deep_copy(numNewRoots, h_numNewRoots);
385 Kokkos::parallel_for(
386 "Aggregation Phase 2a: determining new roots of current color",
387 Kokkos::RangePolicy<execution_space>(0, numRows),
388 KOKKOS_LAMBDA(
const LO rootCandidate) {
389 if (aggStat(rootCandidate) ==
READY &&
390 colors(rootCandidate) == color) {
392 auto neighbors = lclLWGraph.getNeighborVertices(rootCandidate);
396 for (
int j = 0; j < neighbors.length; ++j) {
397 LO neigh = neighbors(j);
398 if (neigh != rootCandidate) {
399 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
400 aggStat(neigh) ==
READY &&
401 aggSize < maxNodesPerAggregate) {
409 if (aggSize > minNodesPerAggregate && aggSize > factor * numNeighbors) {
410 LO newRootIndex = Kokkos::atomic_fetch_add(&numNewRoots(), 1);
411 newRoots(newRootIndex) = rootCandidate;
415 Kokkos::deep_copy(h_numNewRoots, numNewRoots);
417 if (h_numNewRoots() > 0) {
419 Kokkos::sort(newRoots, 0, h_numNewRoots());
421 LO tmpNumNonAggregatedNodes = 0;
423 Kokkos::parallel_reduce(
424 "Aggregation Phase 2a: create new aggregates",
425 Kokkos::RangePolicy<execution_space>(0, h_numNewRoots()),
426 KOKKOS_LAMBDA(
const LO newRootIndex,
LO& lNumNonAggregatedNodes) {
427 LO root = newRoots(newRootIndex);
428 LO newAggID = numLocalAggregates() + newRootIndex;
429 auto neighbors = lclLWGraph.getNeighborVertices(root);
432 vertex2AggId(root, 0) = newAggID;
434 for (
int j = 0; j < neighbors.length; ++j) {
435 LO neigh = neighbors(j);
437 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
438 aggStat(neigh) ==
READY &&
439 aggSize < maxNodesPerAggregate) {
441 vertex2AggId(neigh, 0) = newAggID;
442 procWinner(neigh, 0) = myRank;
447 lNumNonAggregatedNodes -= aggSize;
449 tmpNumNonAggregatedNodes);
450 numNonAggregatedNodes += tmpNumNonAggregatedNodes;
451 h_numLocalAggregates() += h_numNewRoots();
452 Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
Kokkos::View< unsigned *, typename LWGraphHostType::device_type > AggStatHostType
void BuildAggregatesRandom(const Teuchos::ParameterList ¶ms, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
Lightweight MueLu representation of a compressed row storage graph.
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
Container class for aggregation information.
KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const
T & get(const std::string &name, T def_value)
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
void SetIsRoot(LO i, bool value=true)
Set root node information.
typename device_type::execution_space execution_space
KOKKOS_INLINE_FUNCTION size_type getLocalMaxNumRowEntries() const
Returns the maximum number of entries across all rows/columns on this node.
LO GetGraphNumColors()
Get the number of colors needed by the distance 2 coloring.
colors_view_type & GetGraphColors()
Get a distance 2 coloring of the underlying graph. The coloring is computed and set during Phase1 of ...
KOKKOS_INLINE_FUNCTION bool isLocalNeighborVertex(LO i) const
Return true if vertex with local id 'v' is on current process.
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
KOKKOS_INLINE_FUNCTION neighbor_vertices_type getNeighborVertices(LO i) const
Return the list of vertices adjacent to the vertex 'v'.
void BuildAggregatesDeterministic(const Teuchos::ParameterList ¶ms, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
Timer to be used in non-factories.
Lightweight MueLu representation of a compressed row storage graph.
void BuildAggregatesNonKokkos(const ParameterList ¶ms, const LWGraph &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatHostType &aggStat, LO &numNonAggregatedNodes) const
Local aggregation.
void BuildAggregates(const Teuchos::ParameterList ¶ms, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
typename std::conditional< OnHost, Kokkos::Device< Kokkos::Serial, Kokkos::HostSpace >, typename Node::device_type >::type device_type
const RCP< const Teuchos::Comm< int > > GetComm() const
Kokkos::View< unsigned *, typename LWGraphType::device_type > AggStatType
void SetNumAggregates(LO nAggregates)
Set number of local aggregates on current processor.