10 #ifndef MUELU_AGGREGATIONPHASE2BALGORITHM_DEF_HPP_
11 #define MUELU_AGGREGATIONPHASE2BALGORITHM_DEF_HPP_
13 #include <Teuchos_Comm.hpp>
14 #include <Teuchos_CommHelpers.hpp>
20 #include "MueLu_Aggregates.hpp"
22 #include "MueLu_LWGraph.hpp"
29 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
31 Monitor m(*
this,
"BuildAggregatesNonKokkos");
32 bool matchMLbehavior = params.
get<
bool>(
"aggregation: match ML phase2b");
35 const int myRank = graph.
GetComm()->getRank();
42 const int defaultConnectWeight = 100;
43 const int penaltyConnectWeight = 10;
45 std::vector<int> aggWeight(numLocalAggregates, 0);
46 std::vector<int> connectWeight(numRows, defaultConnectWeight);
47 std::vector<int> aggPenalties(numRows, 0);
55 for (
int k = 0; k < 2; k++) {
56 for (
LO i = 0; i < numRows; i++) {
57 if (aggStat[i] !=
READY)
62 for (
int j = 0; j < neighOfINode.length; j++) {
63 LO neigh = neighOfINode(j);
67 aggWeight[vertex2AggId[neigh]] += connectWeight[neigh];
70 int bestScore = -100000;
74 for (
int j = 0; j < neighOfINode.length; j++) {
75 LO neigh = neighOfINode(j);
76 int aggId = vertex2AggId[neigh];
80 int score = aggWeight[aggId] - aggPenalties[aggId];
82 if (score > bestScore) {
85 bestConnect = connectWeight[neigh];
87 }
else if (aggId == bestAggId && connectWeight[neigh] > bestConnect) {
88 bestConnect = connectWeight[neigh];
98 vertex2AggId[i] = bestAggId;
99 procWinner[i] = myRank;
101 numNonAggregatedNodes--;
103 aggPenalties[bestAggId]++;
104 connectWeight[i] = bestConnect - penaltyConnectWeight;
112 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
118 LO& numNonAggregatedNodes)
const {
119 if (params.
get<
bool>(
"aggregation: deterministic")) {
120 Monitor m(*
this,
"BuildAggregatesDeterministic");
121 BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes);
123 Monitor m(*
this,
"BuildAggregatesRandom");
124 BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes);
129 template <
class LO,
class GO,
class Node>
135 LO& numNonAggregatedNodes)
const {
140 const int myRank = graph.
GetComm()->getRank();
142 auto vertex2AggId = aggregates.
GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
143 auto procWinner = aggregates.
GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
148 auto lclLWGraph = graph;
150 const LO defaultConnectWeight = 100;
151 const LO penaltyConnectWeight = 10;
153 Kokkos::View<LO*, device_type> aggWeight(Kokkos::ViewAllocateWithoutInitializing(
"aggWeight"), numLocalAggregates);
154 Kokkos::View<LO*, device_type> connectWeight(Kokkos::ViewAllocateWithoutInitializing(
"connectWeight"), numRows);
155 Kokkos::View<LO*, device_type> aggPenalties(
"aggPenalties", numLocalAggregates);
157 Kokkos::deep_copy(connectWeight, defaultConnectWeight);
167 int maxNodesPerAggregate = params.
get<
int>(
"aggregation: max agg size");
168 if (maxNodesPerAggregate == std::numeric_limits<int>::max()) {
172 for (
LO color = 1; color <= numColors; ++color) {
173 Kokkos::deep_copy(aggWeight, 0);
177 LO numAggregated = 0;
178 Kokkos::parallel_reduce(
179 "Aggregation Phase 2b: aggregates expansion",
180 Kokkos::RangePolicy<execution_space>(0, numRows),
181 KOKKOS_LAMBDA(
const LO i,
LO& tmpNumAggregated) {
182 if (aggStat(i) !=
READY || colors(i) != color)
185 auto neighOfINode = lclLWGraph.getNeighborVertices(i);
186 for (
int j = 0; j < neighOfINode.length; j++) {
187 LO neigh = neighOfINode(j);
191 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
193 Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)),
194 connectWeight(neigh));
197 int bestScore = -100000;
199 int bestConnect = -1;
201 for (
int j = 0; j < neighOfINode.length; j++) {
202 LO neigh = neighOfINode(j);
204 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
206 auto aggId = vertex2AggId(neigh, 0);
207 int score = aggWeight(aggId) - aggPenalties(aggId);
209 if (score > bestScore) {
212 bestConnect = connectWeight(neigh);
214 }
else if (aggId == bestAggId &&
215 connectWeight(neigh) > bestConnect) {
216 bestConnect = connectWeight(neigh);
220 if (bestScore >= 0) {
222 vertex2AggId(i, 0) = bestAggId;
223 procWinner(i, 0) = myRank;
225 Kokkos::atomic_add(&aggPenalties(bestAggId), 1);
226 connectWeight(i) = bestConnect - penaltyConnectWeight;
231 numNonAggregatedNodes -= numAggregated;
237 template <
class LO,
class GO,
class Node>
243 LO& numNonAggregatedNodes)
const {
248 const int myRank = graph.
GetComm()->getRank();
250 auto vertex2AggId = aggregates.
GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
251 auto procWinner = aggregates.
GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
256 auto lclLWGraph = graph;
258 const int defaultConnectWeight = 100;
259 const int penaltyConnectWeight = 10;
261 Kokkos::View<int*, device_type> connectWeight(Kokkos::ViewAllocateWithoutInitializing(
"connectWeight"), numRows);
262 Kokkos::View<int*, device_type> aggWeight(Kokkos::ViewAllocateWithoutInitializing(
"aggWeight"), numLocalAggregates);
263 Kokkos::View<int*, device_type> aggPenaltyUpdates(
"aggPenaltyUpdates", numLocalAggregates);
264 Kokkos::View<int*, device_type> aggPenalties(
"aggPenalties", numLocalAggregates);
266 Kokkos::deep_copy(connectWeight, defaultConnectWeight);
275 int maxNodesPerAggregate = params.
get<
int>(
"aggregation: max agg size");
276 if (maxNodesPerAggregate == std::numeric_limits<int>::max()) {
280 for (
LO color = 1; color <= numColors; color++) {
281 Kokkos::deep_copy(aggWeight, 0);
285 LO numAggregated = 0;
286 Kokkos::parallel_for(
287 "Aggregation Phase 2b: updating agg weights",
288 Kokkos::RangePolicy<execution_space>(0, numRows),
289 KOKKOS_LAMBDA(
const LO i) {
290 if (aggStat(i) !=
READY || colors(i) != color)
292 auto neighOfINode = lclLWGraph.getNeighborVertices(i);
293 for (
int j = 0; j < neighOfINode.length; j++) {
294 LO neigh = neighOfINode(j);
297 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
299 Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)),
300 connectWeight(neigh));
304 Kokkos::parallel_reduce(
305 "Aggregation Phase 2b: aggregates expansion",
306 Kokkos::RangePolicy<execution_space>(0, numRows),
307 KOKKOS_LAMBDA(
const LO i,
LO& tmpNumAggregated) {
308 if (aggStat(i) !=
READY || colors(i) != color)
310 int bestScore = -100000;
312 int bestConnect = -1;
314 auto neighOfINode = lclLWGraph.getNeighborVertices(i);
315 for (
int j = 0; j < neighOfINode.length; j++) {
316 LO neigh = neighOfINode(j);
318 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
320 auto aggId = vertex2AggId(neigh, 0);
321 int score = aggWeight(aggId) - aggPenalties(aggId);
323 if (score > bestScore) {
326 bestConnect = connectWeight(neigh);
328 }
else if (aggId == bestAggId &&
329 connectWeight(neigh) > bestConnect) {
330 bestConnect = connectWeight(neigh);
334 if (bestScore >= 0) {
336 vertex2AggId(i, 0) = bestAggId;
337 procWinner(i, 0) = myRank;
339 Kokkos::atomic_add(&aggPenaltyUpdates(bestAggId), 1);
340 connectWeight(i) = bestConnect - penaltyConnectWeight;
346 Kokkos::parallel_for(
347 "Aggregation Phase 2b: updating agg penalties",
348 Kokkos::RangePolicy<execution_space>(0, numLocalAggregates),
349 KOKKOS_LAMBDA(
const LO agg) {
350 aggPenalties(agg) += aggPenaltyUpdates(agg);
351 aggPenaltyUpdates(agg) = 0;
353 numNonAggregatedNodes -= numAggregated;
Kokkos::View< unsigned *, typename LWGraphHostType::device_type > AggStatHostType
Lightweight MueLu representation of a compressed row storage graph.
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
Container class for aggregation information.
KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const
T & get(const std::string &name, T def_value)
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
typename device_type::execution_space execution_space
void BuildAggregatesDeterministic(const ParameterList ¶ms, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
void BuildAggregatesNonKokkos(const ParameterList ¶ms, const LWGraph &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatHostType &aggStat, LO &numNonAggregatedNodes) const
Local aggregation.
LO GetGraphNumColors()
Get the number of colors needed by the distance 2 coloring.
colors_view_type & GetGraphColors()
Get a distance 2 coloring of the underlying graph. The coloring is computed and set during Phase1 of ...
KOKKOS_INLINE_FUNCTION bool isLocalNeighborVertex(LO i) const
Return true if vertex with local id 'v' is on current process.
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
void BuildAggregates(const ParameterList ¶ms, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
void BuildAggregatesRandom(const ParameterList ¶ms, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
KOKKOS_INLINE_FUNCTION neighbor_vertices_type getNeighborVertices(LO i) const
Return the list of vertices adjacent to the vertex 'v'.
Timer to be used in non-factories.
Lightweight MueLu representation of a compressed row storage graph.
typename std::conditional< OnHost, Kokkos::Device< Kokkos::Serial, Kokkos::HostSpace >, typename Node::device_type >::type device_type
const RCP< const Teuchos::Comm< int > > GetComm() const
Kokkos::View< unsigned *, typename LWGraphType::device_type > AggStatType