46 #ifndef MUELU_AGGREGATIONPHASE2BALGORITHM_DEF_HPP_
47 #define MUELU_AGGREGATIONPHASE2BALGORITHM_DEF_HPP_
49 #include <Teuchos_Comm.hpp>
50 #include <Teuchos_CommHelpers.hpp>
56 #include "MueLu_Aggregates.hpp"
58 #include "MueLu_LWGraph.hpp"
65 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
67 Monitor m(*
this,
"BuildAggregatesNonKokkos");
68 bool matchMLbehavior = params.
get<
bool>(
"aggregation: match ML phase2b");
71 const int myRank = graph.
GetComm()->getRank();
78 const int defaultConnectWeight = 100;
79 const int penaltyConnectWeight = 10;
81 std::vector<int> aggWeight(numLocalAggregates, 0);
82 std::vector<int> connectWeight(numRows, defaultConnectWeight);
83 std::vector<int> aggPenalties(numRows, 0);
91 for (
int k = 0; k < 2; k++) {
92 for (
LO i = 0; i < numRows; i++) {
93 if (aggStat[i] !=
READY)
98 for (
int j = 0; j < neighOfINode.length; j++) {
99 LO neigh = neighOfINode(j);
103 aggWeight[vertex2AggId[neigh]] += connectWeight[neigh];
106 int bestScore = -100000;
108 int bestConnect = -1;
110 for (
int j = 0; j < neighOfINode.length; j++) {
111 LO neigh = neighOfINode(j);
112 int aggId = vertex2AggId[neigh];
116 int score = aggWeight[aggId] - aggPenalties[aggId];
118 if (score > bestScore) {
121 bestConnect = connectWeight[neigh];
123 }
else if (aggId == bestAggId && connectWeight[neigh] > bestConnect) {
124 bestConnect = connectWeight[neigh];
128 aggWeight[aggId] = 0;
132 if (bestScore >= 0) {
134 vertex2AggId[i] = bestAggId;
135 procWinner[i] = myRank;
137 numNonAggregatedNodes--;
139 aggPenalties[bestAggId]++;
140 connectWeight[i] = bestConnect - penaltyConnectWeight;
148 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
154 LO& numNonAggregatedNodes)
const {
155 if (params.
get<
bool>(
"aggregation: deterministic")) {
156 Monitor m(*
this,
"BuildAggregatesDeterministic");
157 BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes);
159 Monitor m(*
this,
"BuildAggregatesRandom");
160 BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes);
165 template <
class LO,
class GO,
class Node>
171 LO& numNonAggregatedNodes)
const {
176 const int myRank = graph.
GetComm()->getRank();
178 auto vertex2AggId = aggregates.
GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
179 auto procWinner = aggregates.
GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
184 auto lclLWGraph = graph;
186 const LO defaultConnectWeight = 100;
187 const LO penaltyConnectWeight = 10;
189 Kokkos::View<LO*, device_type> aggWeight(Kokkos::ViewAllocateWithoutInitializing(
"aggWeight"), numLocalAggregates);
190 Kokkos::View<LO*, device_type> connectWeight(Kokkos::ViewAllocateWithoutInitializing(
"connectWeight"), numRows);
191 Kokkos::View<LO*, device_type> aggPenalties(
"aggPenalties", numLocalAggregates);
193 Kokkos::deep_copy(connectWeight, defaultConnectWeight);
203 int maxNodesPerAggregate = params.
get<
int>(
"aggregation: max agg size");
204 if (maxNodesPerAggregate == std::numeric_limits<int>::max()) {
208 for (
LO color = 1; color <= numColors; ++color) {
209 Kokkos::deep_copy(aggWeight, 0);
213 LO numAggregated = 0;
214 Kokkos::parallel_reduce(
215 "Aggregation Phase 2b: aggregates expansion",
216 Kokkos::RangePolicy<execution_space>(0, numRows),
217 KOKKOS_LAMBDA(
const LO i,
LO& tmpNumAggregated) {
218 if (aggStat(i) !=
READY || colors(i) != color)
221 auto neighOfINode = lclLWGraph.getNeighborVertices(i);
222 for (
int j = 0; j < neighOfINode.length; j++) {
223 LO neigh = neighOfINode(j);
227 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
229 Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)),
230 connectWeight(neigh));
233 int bestScore = -100000;
235 int bestConnect = -1;
237 for (
int j = 0; j < neighOfINode.length; j++) {
238 LO neigh = neighOfINode(j);
240 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
242 auto aggId = vertex2AggId(neigh, 0);
243 int score = aggWeight(aggId) - aggPenalties(aggId);
245 if (score > bestScore) {
248 bestConnect = connectWeight(neigh);
250 }
else if (aggId == bestAggId &&
251 connectWeight(neigh) > bestConnect) {
252 bestConnect = connectWeight(neigh);
256 if (bestScore >= 0) {
258 vertex2AggId(i, 0) = bestAggId;
259 procWinner(i, 0) = myRank;
261 Kokkos::atomic_add(&aggPenalties(bestAggId), 1);
262 connectWeight(i) = bestConnect - penaltyConnectWeight;
267 numNonAggregatedNodes -= numAggregated;
273 template <
class LO,
class GO,
class Node>
279 LO& numNonAggregatedNodes)
const {
284 const int myRank = graph.
GetComm()->getRank();
286 auto vertex2AggId = aggregates.
GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
287 auto procWinner = aggregates.
GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
292 auto lclLWGraph = graph;
294 const int defaultConnectWeight = 100;
295 const int penaltyConnectWeight = 10;
297 Kokkos::View<int*, device_type> connectWeight(Kokkos::ViewAllocateWithoutInitializing(
"connectWeight"), numRows);
298 Kokkos::View<int*, device_type> aggWeight(Kokkos::ViewAllocateWithoutInitializing(
"aggWeight"), numLocalAggregates);
299 Kokkos::View<int*, device_type> aggPenaltyUpdates(
"aggPenaltyUpdates", numLocalAggregates);
300 Kokkos::View<int*, device_type> aggPenalties(
"aggPenalties", numLocalAggregates);
302 Kokkos::deep_copy(connectWeight, defaultConnectWeight);
311 int maxNodesPerAggregate = params.
get<
int>(
"aggregation: max agg size");
312 if (maxNodesPerAggregate == std::numeric_limits<int>::max()) {
316 for (
LO color = 1; color <= numColors; color++) {
317 Kokkos::deep_copy(aggWeight, 0);
321 LO numAggregated = 0;
322 Kokkos::parallel_for(
323 "Aggregation Phase 2b: updating agg weights",
324 Kokkos::RangePolicy<execution_space>(0, numRows),
325 KOKKOS_LAMBDA(
const LO i) {
326 if (aggStat(i) !=
READY || colors(i) != color)
328 auto neighOfINode = lclLWGraph.getNeighborVertices(i);
329 for (
int j = 0; j < neighOfINode.length; j++) {
330 LO neigh = neighOfINode(j);
333 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
335 Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)),
336 connectWeight(neigh));
340 Kokkos::parallel_reduce(
341 "Aggregation Phase 2b: aggregates expansion",
342 Kokkos::RangePolicy<execution_space>(0, numRows),
343 KOKKOS_LAMBDA(
const LO i,
LO& tmpNumAggregated) {
344 if (aggStat(i) !=
READY || colors(i) != color)
346 int bestScore = -100000;
348 int bestConnect = -1;
350 auto neighOfINode = lclLWGraph.getNeighborVertices(i);
351 for (
int j = 0; j < neighOfINode.length; j++) {
352 LO neigh = neighOfINode(j);
354 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
356 auto aggId = vertex2AggId(neigh, 0);
357 int score = aggWeight(aggId) - aggPenalties(aggId);
359 if (score > bestScore) {
362 bestConnect = connectWeight(neigh);
364 }
else if (aggId == bestAggId &&
365 connectWeight(neigh) > bestConnect) {
366 bestConnect = connectWeight(neigh);
370 if (bestScore >= 0) {
372 vertex2AggId(i, 0) = bestAggId;
373 procWinner(i, 0) = myRank;
375 Kokkos::atomic_add(&aggPenaltyUpdates(bestAggId), 1);
376 connectWeight(i) = bestConnect - penaltyConnectWeight;
382 Kokkos::parallel_for(
383 "Aggregation Phase 2b: updating agg penalties",
384 Kokkos::RangePolicy<execution_space>(0, numLocalAggregates),
385 KOKKOS_LAMBDA(
const LO agg) {
386 aggPenalties(agg) += aggPenaltyUpdates(agg);
387 aggPenaltyUpdates(agg) = 0;
389 numNonAggregatedNodes -= numAggregated;
Kokkos::View< unsigned *, typename LWGraphHostType::device_type > AggStatHostType
Lightweight MueLu representation of a compressed row storage graph.
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
Container class for aggregation information.
KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const
T & get(const std::string &name, T def_value)
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
typename device_type::execution_space execution_space
void BuildAggregatesDeterministic(const ParameterList ¶ms, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
void BuildAggregatesNonKokkos(const ParameterList ¶ms, const LWGraph &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatHostType &aggStat, LO &numNonAggregatedNodes) const
Local aggregation.
LO GetGraphNumColors()
Get the number of colors needed by the distance 2 coloring.
colors_view_type & GetGraphColors()
Get a distance 2 coloring of the underlying graph. The coloring is computed and set during Phase1 of ...
KOKKOS_INLINE_FUNCTION bool isLocalNeighborVertex(LO i) const
Return true if vertex with local id 'v' is on current process.
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
void BuildAggregates(const ParameterList ¶ms, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
void BuildAggregatesRandom(const ParameterList ¶ms, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
KOKKOS_INLINE_FUNCTION neighbor_vertices_type getNeighborVertices(LO i) const
Return the list of vertices adjacent to the vertex 'v'.
Timer to be used in non-factories.
Lightweight MueLu representation of a compressed row storage graph.
typename std::conditional< OnHost, Kokkos::Device< Kokkos::Serial, Kokkos::HostSpace >, typename Node::device_type >::type device_type
const RCP< const Teuchos::Comm< int > > GetComm() const
Kokkos::View< unsigned *, typename LWGraphType::device_type > AggStatType