46 #ifndef MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP
47 #define MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP
49 #include <Teuchos_Comm.hpp>
50 #include <Teuchos_CommHelpers.hpp>
56 #include "MueLu_Aggregates.hpp"
58 #include "MueLu_LWGraph_kokkos.hpp"
65 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
70 Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
71 LO& numNonAggregatedNodes)
const {
72 if (params.
get<
bool>(
"aggregation: deterministic")) {
73 Monitor m(*
this,
"BuildAggregatesDeterministic");
74 BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes);
76 Monitor m(*
this,
"BuildAggregatesRandom");
77 BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes);
82 template <
class LO,
class GO,
class Node>
87 Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
88 LO& numNonAggregatedNodes)
const {
90 const int myRank = graph.
GetComm()->getRank();
92 auto vertex2AggId = aggregates.
GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
93 auto procWinner = aggregates.
GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
98 auto lclLWGraph = graph;
100 const LO defaultConnectWeight = 100;
101 const LO penaltyConnectWeight = 10;
103 Kokkos::View<LO*, device_type> aggWeight(Kokkos::ViewAllocateWithoutInitializing(
"aggWeight"), numLocalAggregates);
104 Kokkos::View<LO*, device_type> connectWeight(Kokkos::ViewAllocateWithoutInitializing(
"connectWeight"), numRows);
105 Kokkos::View<LO*, device_type> aggPenalties(
"aggPenalties", numLocalAggregates);
107 Kokkos::deep_copy(connectWeight, defaultConnectWeight);
117 int maxNodesPerAggregate = params.
get<
int>(
"aggregation: max agg size");
118 if (maxNodesPerAggregate == std::numeric_limits<int>::max()) {
122 for (
LO color = 1; color <= numColors; ++color) {
123 Kokkos::deep_copy(aggWeight, 0);
127 LO numAggregated = 0;
128 Kokkos::parallel_reduce(
129 "Aggregation Phase 2b: aggregates expansion",
130 Kokkos::RangePolicy<execution_space>(0, numRows),
131 KOKKOS_LAMBDA(
const LO i,
LO& tmpNumAggregated) {
132 if (aggStat(i) !=
READY || colors(i) != color)
135 auto neighOfINode = lclLWGraph.getNeighborVertices(i);
136 for (
int j = 0; j < neighOfINode.length; j++) {
137 LO neigh = neighOfINode(j);
141 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
143 Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)),
144 connectWeight(neigh));
147 int bestScore = -100000;
149 int bestConnect = -1;
151 for (
int j = 0; j < neighOfINode.length; j++) {
152 LO neigh = neighOfINode(j);
154 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
156 auto aggId = vertex2AggId(neigh, 0);
157 int score = aggWeight(aggId) - aggPenalties(aggId);
159 if (score > bestScore) {
162 bestConnect = connectWeight(neigh);
164 }
else if (aggId == bestAggId &&
165 connectWeight(neigh) > bestConnect) {
166 bestConnect = connectWeight(neigh);
170 if (bestScore >= 0) {
172 vertex2AggId(i, 0) = bestAggId;
173 procWinner(i, 0) = myRank;
175 Kokkos::atomic_add(&aggPenalties(bestAggId), 1);
176 connectWeight(i) = bestConnect - penaltyConnectWeight;
181 numNonAggregatedNodes -= numAggregated;
187 template <
class LO,
class GO,
class Node>
192 Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
193 LO& numNonAggregatedNodes)
const {
195 const int myRank = graph.
GetComm()->getRank();
197 auto vertex2AggId = aggregates.
GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
198 auto procWinner = aggregates.
GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
203 auto lclLWGraph = graph;
205 const int defaultConnectWeight = 100;
206 const int penaltyConnectWeight = 10;
208 Kokkos::View<int*, device_type> connectWeight(Kokkos::ViewAllocateWithoutInitializing(
"connectWeight"), numRows);
209 Kokkos::View<int*, device_type> aggWeight(Kokkos::ViewAllocateWithoutInitializing(
"aggWeight"), numLocalAggregates);
210 Kokkos::View<int*, device_type> aggPenaltyUpdates(
"aggPenaltyUpdates", numLocalAggregates);
211 Kokkos::View<int*, device_type> aggPenalties(
"aggPenalties", numLocalAggregates);
213 Kokkos::deep_copy(connectWeight, defaultConnectWeight);
222 int maxNodesPerAggregate = params.
get<
int>(
"aggregation: max agg size");
223 if (maxNodesPerAggregate == std::numeric_limits<int>::max()) {
227 for (
LO color = 1; color <= numColors; color++) {
228 Kokkos::deep_copy(aggWeight, 0);
232 LO numAggregated = 0;
233 Kokkos::parallel_for(
234 "Aggregation Phase 2b: updating agg weights",
235 Kokkos::RangePolicy<execution_space>(0, numRows),
236 KOKKOS_LAMBDA(
const LO i) {
237 if (aggStat(i) !=
READY || colors(i) != color)
239 auto neighOfINode = lclLWGraph.getNeighborVertices(i);
240 for (
int j = 0; j < neighOfINode.length; j++) {
241 LO neigh = neighOfINode(j);
244 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
246 Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)),
247 connectWeight(neigh));
251 Kokkos::parallel_reduce(
252 "Aggregation Phase 2b: aggregates expansion",
253 Kokkos::RangePolicy<execution_space>(0, numRows),
254 KOKKOS_LAMBDA(
const LO i,
LO& tmpNumAggregated) {
255 if (aggStat(i) !=
READY || colors(i) != color)
257 int bestScore = -100000;
259 int bestConnect = -1;
261 auto neighOfINode = lclLWGraph.getNeighborVertices(i);
262 for (
int j = 0; j < neighOfINode.length; j++) {
263 LO neigh = neighOfINode(j);
265 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
267 auto aggId = vertex2AggId(neigh, 0);
268 int score = aggWeight(aggId) - aggPenalties(aggId);
270 if (score > bestScore) {
273 bestConnect = connectWeight(neigh);
275 }
else if (aggId == bestAggId &&
276 connectWeight(neigh) > bestConnect) {
277 bestConnect = connectWeight(neigh);
281 if (bestScore >= 0) {
283 vertex2AggId(i, 0) = bestAggId;
284 procWinner(i, 0) = myRank;
286 Kokkos::atomic_add(&aggPenaltyUpdates(bestAggId), 1);
287 connectWeight(i) = bestConnect - penaltyConnectWeight;
293 Kokkos::parallel_for(
294 "Aggregation Phase 2b: updating agg penalties",
295 Kokkos::RangePolicy<execution_space>(0, numLocalAggregates),
296 KOKKOS_LAMBDA(
const LO agg) {
297 aggPenalties(agg) += aggPenaltyUpdates(agg);
298 aggPenaltyUpdates(agg) = 0;
300 numNonAggregatedNodes -= numAggregated;
306 #endif // MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP
Lightweight MueLu representation of a compressed row storage graph.
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
Container class for aggregation information.
void BuildAggregates(const ParameterList ¶ms, const LWGraph_kokkos &graph, Aggregates &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
Local aggregation.
KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const
T & get(const std::string &name, T def_value)
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
LO GetGraphNumColors()
Get the number of colors needed by the distance 2 coloring.
void BuildAggregatesRandom(const ParameterList ¶ms, const LWGraph_kokkos &graph, Aggregates &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
colors_view_type & GetGraphColors()
Get a distance 2 coloring of the underlying graph. The coloring is computed and set during Phase1 of ...
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
void BuildAggregatesDeterministic(const ParameterList ¶ms, const LWGraph_kokkos &graph, Aggregates &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
Timer to be used in non-factories.
const RCP< const Teuchos::Comm< int > > GetComm() const