46 #ifndef MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP
47 #define MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP
49 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
51 #include <Teuchos_Comm.hpp>
52 #include <Teuchos_CommHelpers.hpp>
54 #include <Xpetra_Vector.hpp>
58 #include "MueLu_Aggregates_kokkos.hpp"
60 #include "MueLu_LWGraph_kokkos.hpp"
67 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
68 void AggregationPhase2bAlgorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
69 BuildAggregates(
const ParameterList& params,
70 const LWGraph_kokkos& graph,
71 Aggregates_kokkos& aggregates,
73 LO& numNonAggregatedNodes)
const {
75 if(params.get<
bool>(
"aggregation: deterministic")) {
76 Monitor m(*
this,
"BuildAggregatesDeterministic");
77 BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes);
79 Monitor m(*
this,
"BuildAggregatesRandom");
80 BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes);
85 template <
class LO,
class GO,
class Node>
86 void AggregationPhase2bAlgorithm_kokkos<LO, GO, Node>::
87 BuildAggregatesRandom(
const ParameterList& params,
88 const LWGraph_kokkos& graph,
89 Aggregates_kokkos& aggregates,
91 LO& numNonAggregatedNodes)
const {
93 const LO numRows = graph.GetNodeNumVertices();
94 const int myRank = graph.GetComm()->getRank();
96 auto vertex2AggId = aggregates.GetVertex2AggId()->template getLocalView<memory_space>();
97 auto procWinner = aggregates.GetProcWinner() ->template getLocalView<memory_space>();
98 auto colors = aggregates.GetGraphColors();
99 const LO numColors = aggregates.GetGraphNumColors();
100 const LO numLocalAggregates = aggregates.GetNumAggregates();
102 const LO defaultConnectWeight = 100;
103 const LO penaltyConnectWeight = 10;
119 int maxNodesPerAggregate = params.get<
int>(
"aggregation: max agg size");
120 if(maxNodesPerAggregate == std::numeric_limits<int>::max()) {maxIters = 1;}
121 for (
int iter = 0;
iter < maxIters; ++
iter) {
122 for(LO color = 1; color <= numColors; ++color) {
127 LO numAggregated = 0;
130 KOKKOS_LAMBDA (
const LO i, LO& tmpNumAggregated) {
131 if (aggStat(i) !=
READY || colors(i) != color)
134 auto neighOfINode = graph.getNeighborVertices(i);
135 for (
int j = 0; j < neighOfINode.length; j++) {
136 LO neigh = neighOfINode(j);
140 if (graph.isLocalNeighborVertex(neigh) &&
142 Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)),
143 connectWeight(neigh));
146 int bestScore = -100000;
148 int bestConnect = -1;
150 for (
int j = 0; j < neighOfINode.length; j++) {
151 LO neigh = neighOfINode(j);
153 if (graph.isLocalNeighborVertex(neigh) &&
155 auto aggId = vertex2AggId(neigh, 0);
156 int score = aggWeight(aggId) - aggPenalties(aggId);
158 if (score > bestScore) {
161 bestConnect = connectWeight(neigh);
163 }
else if (aggId == bestAggId &&
164 connectWeight(neigh) > bestConnect) {
165 bestConnect = connectWeight(neigh);
169 if (bestScore >= 0) {
171 vertex2AggId(i, 0) = bestAggId;
172 procWinner(i, 0) = myRank;
174 Kokkos::atomic_add(&aggPenalties(bestAggId), 1);
175 connectWeight(i) = bestConnect - penaltyConnectWeight;
179 numNonAggregatedNodes -= numAggregated;
187 template <
class LO,
class GO,
class Node>
188 void AggregationPhase2bAlgorithm_kokkos<LO, GO, Node>::
189 BuildAggregatesDeterministic(
const ParameterList& params,
190 const LWGraph_kokkos& graph,
191 Aggregates_kokkos& aggregates,
193 LO& numNonAggregatedNodes)
const {
195 const LO numRows = graph.GetNodeNumVertices();
196 const int myRank = graph.GetComm()->getRank();
198 auto vertex2AggId = aggregates.GetVertex2AggId()->template getLocalView<memory_space>();
199 auto procWinner = aggregates.GetProcWinner() ->template getLocalView<memory_space>();
200 auto colors = aggregates.GetGraphColors();
201 const LO numColors = aggregates.GetGraphNumColors();
202 LO numLocalAggregates = aggregates.GetNumAggregates();
204 const int defaultConnectWeight = 100;
205 const int penaltyConnectWeight = 10;
221 int maxNodesPerAggregate = params.get<
int>(
"aggregation: max agg size");
222 if(maxNodesPerAggregate == std::numeric_limits<int>::max()) {maxIters = 1;}
223 for (
int iter = 0;
iter < maxIters; ++
iter) {
224 for(LO color = 1; color <= numColors; color++) {
229 LO numAggregated = 0;
232 KOKKOS_LAMBDA (
const LO i)
234 if (aggStat(i) !=
READY || colors(i) != color)
236 auto neighOfINode = graph.getNeighborVertices(i);
237 for (
int j = 0; j < neighOfINode.length; j++) {
238 LO neigh = neighOfINode(j);
241 if (graph.isLocalNeighborVertex(neigh) &&
243 Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)),
244 connectWeight(neigh));
247 execution_space().fence();
250 KOKKOS_LAMBDA (
const LO i, LO& tmpNumAggregated)
252 if (aggStat(i) !=
READY || colors(i) != color)
254 int bestScore = -100000;
256 int bestConnect = -1;
258 auto neighOfINode = graph.getNeighborVertices(i);
259 for (
int j = 0; j < neighOfINode.length; j++) {
260 LO neigh = neighOfINode(j);
262 if (graph.isLocalNeighborVertex(neigh) &&
264 auto aggId = vertex2AggId(neigh, 0);
265 int score = aggWeight(aggId) - aggPenalties(aggId);
267 if (score > bestScore) {
270 bestConnect = connectWeight(neigh);
272 }
else if (aggId == bestAggId &&
273 connectWeight(neigh) > bestConnect) {
274 bestConnect = connectWeight(neigh);
278 if (bestScore >= 0) {
280 vertex2AggId(i, 0) = bestAggId;
281 procWinner(i, 0) = myRank;
283 Kokkos::atomic_add(&aggPenaltyUpdates(bestAggId), 1);
284 connectWeight(i) = bestConnect - penaltyConnectWeight;
288 execution_space().fence();
291 KOKKOS_LAMBDA (
const LO agg)
293 aggPenalties(agg) += aggPenaltyUpdates(agg);
294 aggPenaltyUpdates(agg) = 0;
296 numNonAggregatedNodes -= numAggregated;
302 #endif // HAVE_MUELU_KOKKOS_REFACTOR
303 #endif // MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP
void deep_copy(const View< DT, DP...> &dst, typename ViewTraits< DT, DP...>::const_value_type &value, typename std::enable_if< std::is_same< typename ViewTraits< DT, DP...>::specialize, void >::value >::type *=nullptr)
void parallel_reduce(const std::string &label, const PolicyType &policy, const FunctorType &functor, ReturnType &return_value, typename std::enable_if< Kokkos::Impl::is_execution_policy< PolicyType >::value >::type *=nullptr)
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename std::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=nullptr)